From 267e8b67c210eaccebf35dbaaf9e749b3d1a0246 Mon Sep 17 00:00:00 2001 From: "jiafeng.zhang" Date: Thu, 28 Apr 2022 15:22:34 +0800 Subject: [PATCH] [refactor][doc]The new version of the document is online (#9272) replace the `docs/` with `new-docs/` --- docs/.markdownlintignore | 2 - docs/.vuepress/sidebar/en.js | 1466 +- docs/.vuepress/sidebar/zh-CN.js | 899 +- docs/en/README.md | 2 +- .../cluster-management/elastic-expansion.md | 0 .../cluster-management/load-balancing.md | 0 .../cluster-management/upgrade.md | 0 .../en/admin-manual/config/be-config.md | 0 .../en/admin-manual/config/fe-config.md | 0 .../en/admin-manual/config/user-property.md | 0 .../en/admin-manual/data-admin/backup.md | 0 .../admin-manual/data-admin/delete-recover.md | 0 .../en/admin-manual/data-admin/restore.md | 0 .../http-actions/cancel-label.md | 0 .../http-actions/check-reset-rpc-cache.md | 0 .../http-actions/compaction-action.md | 0 .../http-actions/connection-action.md | 0 .../http-actions/fe-get-log-file.md | 0 .../http-actions/fe/backends-action.md | 0 .../http-actions/fe/bootstrap-action.md | 0 .../http-actions/fe/cancel-load-action.md | 0 .../fe/check-decommission-action.md | 0 .../fe/check-storage-type-action.md | 0 .../http-actions/fe/config-action.md | 0 .../http-actions/fe/connection-action.md | 0 .../http-actions/fe/get-ddl-stmt-action.md | 0 .../http-actions/fe/get-load-info-action.md | 0 .../http-actions/fe/get-load-state.md | 0 .../http-actions/fe/get-log-file-action.md | 0 .../http-actions/fe/get-small-file.md | 0 .../http-actions/fe/ha-action.md | 0 .../http-actions/fe/hardware-info-action.md | 0 .../http-actions/fe/health-action.md | 0 .../http-actions/fe/log-action.md | 0 .../http-actions/fe/logout-action.md | 0 .../http-actions/fe/manager/cluster-action.md | 0 .../http-actions/fe/manager/node-action.md | 0 .../fe/manager/query-profile-action.md | 0 .../http-actions/fe/meta-action.md | 0 .../http-actions/fe/meta-info-action.md | 0 .../fe/meta-replay-state-action.md | 0 .../http-actions/fe/profile-action.md | 0 .../http-actions/fe/query-detail-action.md | 0 .../http-actions/fe/query-profile-action.md | 0 .../http-actions/fe/row-count-action.md | 0 .../http-actions/fe/session-action.md | 0 .../http-actions/fe/set-config-action.md | 0 .../http-actions/fe/show-data-action.md | 0 .../http-actions/fe/show-meta-info-action.md | 0 .../http-actions/fe/show-proc-action.md | 0 .../fe/show-runtime-info-action.md | 0 .../fe/statement-execution-action.md | 0 .../http-actions/fe/system-action.md | 0 .../fe/table-query-plan-action.md | 0 .../http-actions/fe/table-row-count-action.md | 0 .../http-actions/fe/table-schema-action.md | 0 .../http-actions/fe/upload-action.md | 0 .../http-actions/get-load-state.md | 0 .../http-actions/get-tablets.md | 0 .../http-actions/profile-action.md | 0 .../http-actions/query-detail-action.md | 0 .../http-actions/restore-tablet.md | 0 .../http-actions/show-data-action.md | 0 .../http-actions/tablet-migration-action.md | 0 .../http-actions/tablets_distribution.md | 0 .../maint-monitor/be-olap-error-code.md | 0 .../maint-monitor/disk-capacity.md | 0 .../maint-monitor}/doris-error-code.md | 0 .../maint-monitor/metadata-operation.md | 0 .../maint-monitor}/monitor-alert.md | 0 .../monitor-metrics/be-metrics.md | 0 .../monitor-metrics/fe-metrics.md | 0 .../maint-monitor}/multi-tenant.md | 0 .../maint-monitor/tablet-meta-tool.md | 0 .../tablet-repair-and-balance.md | 0 .../maint-monitor}/tablet-restore-tool.md | 0 .../en/admin-manual/multi-tenant.md | 0 .../en/admin-manual/optimization.md | 0 .../en/admin-manual/privilege-ldap/ldap.md | 0 .../privilege-ldap/user-privilege.md | 0 .../en/admin-manual/query-profile.md | 0 .../en/admin-manual/sql-interception.md | 0 .../alter-table/alter-table-bitmap-index.md | 80 - .../alter-table/alter-table-replace-table.md | 72 - .../alter-table/alter-table-rollup.md | 188 - .../alter-table/alter-table-schema-change.md | 231 - .../alter-table/alter-table-temp-partition.md | 288 - docs/en/administrator-guide/backup-restore.md | 195 - .../block-rule/sql-block.md | 93 - docs/en/administrator-guide/broker.md | 289 - .../bucket-shuffle-join.md | 105 - .../en/administrator-guide/colocation-join.md | 409 - .../administrator-guide/config/be_config.md | 1526 - .../administrator-guide/config/fe_config.md | 2210 -- .../config/user_property.md | 73 - .../administrator-guide/dynamic-partition.md | 464 - docs/en/administrator-guide/export-manual.md | 198 - docs/en/administrator-guide/ldap.md | 175 - .../load-data/batch-delete-manual.md | 204 - .../load-data/binlog-load-manual.md | 523 - .../load-data/broker-load-manual.md | 536 - .../load-data/delete-manual.md | 194 - .../load-data/insert-into-manual.md | 297 - .../load-data/load-json-format.md | 467 - .../load-data/load-manual.md | 228 - .../load-data/routine-load-manual.md | 334 - .../load-data/s3-load-manual.md | 93 - .../load-data/sequence-column-manual.md | 208 - .../load-data/spark-load-manual.md | 632 - .../load-data/stream-load-manual.md | 374 - .../administrator-guide/materialized_view.md | 486 - docs/en/administrator-guide/multi-tenant.md | 222 - .../operation/be-olap-error-code.md | 256 - .../operation/disk-capacity.md | 169 - .../operation/metadata-operation.md | 404 - .../operation/tablet-meta-tool.md | 145 - .../operation/tablet-repair-and-balance.md | 774 - docs/en/administrator-guide/outfile.md | 195 - docs/en/administrator-guide/privilege.md | 224 - docs/en/administrator-guide/query_cache.md | 138 - .../resource-management.md | 169 - .../en/administrator-guide/running-profile.md | 276 - docs/en/administrator-guide/runtime-filter.md | 284 - docs/en/administrator-guide/small-file-mgr.md | 104 - docs/en/administrator-guide/sql-mode.md | 76 - docs/en/administrator-guide/time-zone.md | 98 - docs/en/administrator-guide/update.md | 126 - docs/en/administrator-guide/variables.md | 499 - .../en/advanced/alter-table/replace-table.md | 0 .../en/advanced/alter-table/schema-change.md | 0 .../en/advanced/best-practice/debug-log.md | 0 .../advanced/best-practice/import-analysis.md | 0 .../advanced/best-practice/query-analysis.md | 0 {new-docs => docs}/en/advanced/broker.md | 0 .../en/advanced/cache/partition-cache.md | 0 .../en/advanced/cache/query-cache.md | 0 .../join-optimization/bucket-shuffle-join.md | 0 .../join-optimization/colocation-join.md | 0 .../join-optimization/runtime-filter.md | 0 .../en/advanced/materialized-view.md | 0 .../orthogonal-bitmap-manual.md | 0 .../en/advanced/orthogonal-hll-manual.md | 0 .../advanced/partition/dynamic-partition.md | 0 .../partition/table-temp-partition.md | 0 {new-docs => docs}/en/advanced/resource.md | 0 .../en/advanced/small-file-mgr.md | 0 {new-docs => docs}/en/advanced/time-zone.md | 0 {new-docs => docs}/en/advanced/variables.md | 0 .../vectorized-execution-engine.md | 0 docs/en/benchmark/samples.md | 56 - {new-docs => docs}/en/benchmark/ssb.md | 0 docs/en/benchmark/star-schema-benchmark.md | 181 - docs/en/benchmark/systemd.md | 31 - {new-docs => docs}/en/benchmark/tpc-h.md | 0 .../commit-format-specification.md | 6 +- .../release-and-verify/release-complete.md | 6 +- .../en/data-operate/export/export-manual.md | 0 .../export}/export_with_mysql_dump.md | 0 .../en/data-operate/export/outfile.md | 0 .../import-scenes/external-storage-load.md | 0 .../import-scenes/external-table-load.md | 0 .../import/import-scenes/jdbc-load.md | 0 .../import/import-scenes/kafka-load.md | 0 .../import/import-scenes/load-atomicity.md | 0 .../import/import-scenes/load-data-convert.md | 0 .../import/import-scenes/load-strict-mode.md | 0 .../import/import-scenes/local-file-load.md | 0 .../import/import-way/binlog-load-manual.md | 0 .../import/import-way/broker-load-manual.md | 0 .../import/import-way/insert-into-manual.md | 0 .../import/import-way/load-json-format.md | 0 .../import/import-way/routine-load-manual.md | 0 .../import/import-way/s3-load-manual.md | 0 .../import/import-way/spark-load-manual.md | 0 .../import/import-way/stream-load-manual.md | 0 .../en/data-operate/import/load-manual.md | 0 .../update-delete/batch-delete-manual.md | 0 .../update-delete/delete-manual.md | 0 .../update-delete/sequence-column-manual.md | 0 .../en/data-operate/update-delete/update.md | 0 .../en/data-table/advance-usage.md | 0 .../en/data-table/basic-usage.md | 0 .../en/data-table/best-practice.md | 0 .../en/data-table/data-model.md | 0 .../en/data-table/data-partition.md | 0 .../en/data-table/hit-the-rollup.md | 0 .../en/data-table/index/bitmap-index.md | 0 .../index}/bloomfilter.md | 0 .../en/data-table/index/prefix-index.md | 0 docs/en/developer-guide/be-vscode-dev.md | 4 +- docs/en/developer-guide/benchmark-tool.md | 8 +- .../en/developer-guide/cpp-diagnostic-code.md | 2 +- docs/en/developer-guide/fe-idea-dev.md | 10 +- docs/en/developer-guide/fe-vscode-dev.md | 2 +- .../audit-plugin.md | 0 {new-docs => docs}/en/ecosystem/datax.md | 0 .../doris-manager/cluster-managenent.md | 69 + .../doris-manager/compiling-deploying.md | 112 + .../ecosystem/doris-manager/initializing.md | 43 + docs/en/ecosystem/doris-manager/space-list.md | 234 + .../doris-manager/space-management.md | 53 + .../doris-manager/system-settings.md | 91 + .../ecosystem/external-table/doris-on-es.md | 0 .../ecosystem/external-table/hive-of-doris.md | 0 .../external-table/iceberg-of-doris.md | 0 .../ecosystem/external-table/odbc-of-doris.md | 0 .../en/ecosystem/flink-doris-connector.md | 0 {new-docs => docs}/en/ecosystem/logstash.md | 0 .../plugin-development-manual.md | 0 .../en/ecosystem/seatunnel/flink-sink.md | 0 .../en/ecosystem/seatunnel/spark-sink.md | 0 .../en/ecosystem/spark-doris-connector.md | 0 .../en/ecosystem/udf/contribute-udf.md | 0 .../udf/native-user-defined-function.md | 0 .../udf/remote-user-defined-function.md | 0 docs/en/extending-doris/datax.md | 104 - docs/en/extending-doris/doris-on-es.md | 589 - .../extending-doris/flink-doris-connector.md | 496 - docs/en/extending-doris/hive-bitmap-udf.md | 97 - docs/en/extending-doris/hive-of-doris.md | 117 - docs/en/extending-doris/iceberg-of-doris.md | 211 - docs/en/extending-doris/logstash.md | 198 - docs/en/extending-doris/odbc-of-doris.md | 374 - .../extending-doris/seatunnel/flink-sink.md | 116 - .../extending-doris/seatunnel/spark-sink.md | 123 - .../extending-doris/spark-doris-connector.md | 286 - docs/en/extending-doris/udf/contribute-udf.md | 124 - .../udf/java-user-defined-function.md | 89 - .../udf/native-user-defined-function.md | 264 - .../udf/remote-user-defined-function.md | 109 - {new-docs => docs}/en/faq/data-faq.md | 0 docs/en/faq/error.md | 153 - docs/en/faq/faq.md | 297 - {new-docs => docs}/en/faq/install-faq.md | 0 {new-docs => docs}/en/faq/sql-faq.md | 0 .../en/get-starting/get-starting.md | 4 +- docs/en/getting-started/advance-usage.md | 280 - docs/en/getting-started/basic-usage.md | 382 - docs/en/getting-started/best-practice.md | 198 - docs/en/getting-started/data-model-rollup.md | 636 - docs/en/getting-started/data-partition.md | 398 - docs/en/getting-started/hit-the-rollup.md | 298 - .../en/install/install-deploy.md | 0 .../install/source-install/compilation-arm.md | 0 .../compilation-with-ldb-toolchain.md | 0 .../en/install/source-install/compilation.md | 0 docs/en/installing/compilation-arm.md | 258 - docs/en/installing/compilation.md | 263 - docs/en/installing/install-deploy.md | 490 - docs/en/installing/upgrade.md | 86 - .../internal/Flink doris connector Design.md | 259 - .../en/internal/doris_storage_optimization.md | 235 - docs/en/internal/grouping_sets_design.md | 501 - docs/en/internal/metadata-design.md | 127 - .../approx_count_distinct.md | 0 .../sql-functions/aggregate-functions/avg.md | 0 .../aggregate-functions/bitmap_union.md | 0 .../aggregate-functions/count.md | 0 .../aggregate-functions/group_concat.md | 0 .../aggregate-functions/hll_union_agg.md | 0 .../sql-functions/aggregate-functions/max.md | 0 .../aggregate-functions/max_by.md | 0 .../sql-functions/aggregate-functions/min.md | 0 .../aggregate-functions/min_by.md | 0 .../aggregate-functions/percentile.md | 0 .../aggregate-functions/percentile_approx.md | 0 .../aggregate-functions/stddev.md | 0 .../aggregate-functions/stddev_samp.md | 0 .../sql-functions/aggregate-functions/sum.md | 0 .../sql-functions/aggregate-functions/topn.md | 0 .../aggregate-functions/var_samp.md | 0 .../aggregate-functions/variance.md | 0 .../bitmap-functions/bitmap_and.md | 0 .../bitmap-functions/bitmap_and_count.md | 0 .../bitmap-functions/bitmap_and_not.md | 0 .../bitmap-functions/bitmap_and_not_count.md | 0 .../bitmap-functions/bitmap_contains.md | 0 .../bitmap-functions/bitmap_empty.md | 0 .../bitmap-functions/bitmap_from_string.md | 0 .../bitmap-functions/bitmap_has_all.md | 0 .../bitmap-functions/bitmap_has_any.md | 0 .../bitmap-functions/bitmap_hash.md | 0 .../bitmap-functions/bitmap_intersect.md | 0 .../bitmap-functions/bitmap_max.md | 0 .../bitmap-functions/bitmap_min.md | 0 .../bitmap-functions/bitmap_not.md | 0 .../bitmap-functions/bitmap_or.md | 0 .../bitmap-functions/bitmap_or_count.md | 0 .../bitmap_subset_in_range.md | 0 .../bitmap-functions/bitmap_subset_limit.md | 0 .../bitmap-functions/bitmap_to_string.md | 0 .../bitmap-functions/bitmap_union.md | 0 .../bitmap-functions/bitmap_xor.md | 0 .../bitmap-functions/bitmap_xor_count.md | 0 .../orthogonal_bitmap_intersect.md | 0 .../orthogonal_bitmap_intersect_count.md | 0 .../orthogonal_bitmap_union_count.md | 0 .../bitmap-functions/sub_bitmap.md | 0 .../bitmap-functions/to_bitmap.md | 0 .../sql-functions/bitwise-functions/bitand.md | 0 .../sql-functions/bitwise-functions/bitnot.md | 0 .../sql-functions/bitwise-functions/bitor.md | 0 .../sql-functions/bitwise-functions/bitxor.md | 0 .../sql-functions/cast.md | 0 .../conditional-functions/case.md | 0 .../conditional-functions/coalesce.md | 0 .../sql-functions/conditional-functions/if.md | 0 .../conditional-functions/ifnull.md | 0 .../conditional-functions/nullif.md | 0 .../date-time-functions/convert_tz.md | 0 .../date-time-functions/curdate.md | 0 .../date-time-functions/current_timestamp.md | 0 .../date-time-functions/curtime.md | 0 .../date-time-functions/date_add.md | 0 .../date-time-functions/date_format.md | 0 .../date-time-functions/date_sub.md | 0 .../date-time-functions/datediff.md | 0 .../sql-functions/date-time-functions/day.md | 0 .../date-time-functions/dayname.md | 0 .../date-time-functions/dayofmonth.md | 0 .../date-time-functions/dayofweek.md | 0 .../date-time-functions/dayofyear.md | 0 .../date-time-functions/from_days.md | 0 .../date-time-functions/from_unixtime.md | 0 .../sql-functions/date-time-functions/hour.md | 0 .../date-time-functions/makedate.md | 0 .../date-time-functions/minute.md | 0 .../date-time-functions/month.md | 0 .../date-time-functions/monthname.md | 0 .../sql-functions/date-time-functions/now.md | 0 .../date-time-functions/second.md | 0 .../date-time-functions/str_to_date.md | 0 .../date-time-functions/time_round.md | 0 .../date-time-functions/timediff.md | 0 .../date-time-functions/timestampadd.md | 0 .../date-time-functions/timestampdiff.md | 0 .../date-time-functions/to_date.md | 0 .../date-time-functions/to_days.md | 0 .../date-time-functions/unix_timestamp.md | 0 .../date-time-functions/utc_timestamp.md | 0 .../sql-functions/date-time-functions/week.md | 0 .../date-time-functions/weekday.md | 0 .../date-time-functions/weekofyear.md | 0 .../sql-functions/date-time-functions/year.md | 0 .../date-time-functions/yearweek.md | 0 .../sql-functions/digital-masking.md | 0 .../encrypt-digest-functions/aes.md | 0 .../encrypt-digest-functions/md5.md | 0 .../encrypt-digest-functions/md5sum.md | 0 .../encrypt-digest-functions/sm3.md | 0 .../encrypt-digest-functions/sm3sum.md | 0 .../encrypt-digest-functions/sm4.md | 0 .../encrypt-dixgest-functions}/aes.md | 0 .../encrypt-dixgest-functions}/md5.md | 0 .../encrypt-dixgest-functions}/md5sum.md | 0 .../encrypt-dixgest-functions}/sm3.md | 0 .../encrypt-dixgest-functions}/sm3sum.md | 0 .../encrypt-dixgest-functions}/sm4.md | 0 .../hash-functions/murmur_hash3_32.md | 0 .../json-functions/get_json_double.md | 0 .../json-functions/get_json_int.md | 0 .../json-functions/get_json_string.md | 0 .../json-functions/json_array.md | 0 .../json-functions/json_object.md | 0 .../json-functions/json_quote.md | 0 .../sql-functions/math-functions/conv.md | 0 .../sql-functions/math-functions/pmod.md | 0 .../spatial-functions/st_astext.md | 0 .../spatial-functions/st_circle.md | 0 .../spatial-functions/st_contains.md | 0 .../spatial-functions/st_distance_sphere.md | 0 .../spatial-functions/st_geometryfromtext.md | 0 .../spatial-functions/st_linefromtext.md | 0 .../spatial-functions/st_point.md | 0 .../spatial-functions/st_polygon.md | 0 .../sql-functions/spatial-functions/st_x.md | 0 .../sql-functions/spatial-functions/st_y.md | 0 .../append_trailing_char_if_absent.md | 0 .../sql-functions/string-functions/ascii.md | 0 .../string-functions/bit_length.md | 0 .../string-functions/char_length.md | 0 .../sql-functions/string-functions/concat.md | 0 .../string-functions/concat_ws.md | 0 .../string-functions/ends_with.md | 0 .../string-functions/find_in_set.md | 0 .../sql-functions/string-functions/hex.md | 0 .../sql-functions/string-functions/instr.md | 0 .../sql-functions/string-functions/lcase.md | 0 .../sql-functions/string-functions/left.md | 0 .../sql-functions/string-functions/length.md | 0 .../string-functions/like/like.md | 0 .../string-functions/like/not_like.md | 0 .../sql-functions/string-functions/locate.md | 0 .../sql-functions/string-functions/lower.md | 0 .../sql-functions/string-functions/lpad.md | 0 .../sql-functions/string-functions/ltrim.md | 0 .../string-functions/money_format.md | 0 .../string-functions/null_or_empty.md | 0 .../string-functions/regexp/not_regexp.md | 0 .../string-functions/regexp/regexp.md | 0 .../string-functions/regexp/regexp_extract.md | 0 .../string-functions/regexp/regexp_replace.md | 0 .../sql-functions/string-functions/repeat.md | 0 .../sql-functions/string-functions/replace.md | 0 .../sql-functions/string-functions/reverse.md | 0 .../sql-functions/string-functions/right.md | 0 .../sql-functions/string-functions/rpad.md | 0 .../string-functions/split_part.md | 0 .../string-functions/starts_with.md | 0 .../sql-functions/string-functions/strleft.md | 0 .../string-functions/strright.md | 0 .../string-functions/substring.md | 0 .../sql-functions/string-functions/unhex.md | 0 .../table-functions/explode-bitmap.md | 0 .../table-functions/explode-json-array.md | 0 .../table-functions/explode-numbers.md | 0 .../table-functions/explode-split.md | 0 .../table-functions/outer-combinator.md | 0 .../sql-functions/window-function.md | 0 .../CREATE-ROLE.md | 0 .../CREATE-USER.md | 0 .../DROP-ROLE.md | 0 .../DROP-USER.md | 0 .../Account-Management-Statements/GRANT.md | 0 .../Account-Management-Statements/LDAP.md | 0 .../Account-Management-Statements/REVOKE.md | 0 .../SET-PASSWORD.md | 0 .../SET-PROPERTY.md | 0 .../ALTER-SYSTEM-ADD-BACKEND.md | 0 .../ALTER-SYSTEM-ADD-BROKER.md | 0 .../ALTER-SYSTEM-ADD-FOLLOWER.md | 0 .../ALTER-SYSTEM-ADD-OBSERVER.md | 0 .../ALTER-SYSTEM-DECOMMISSION-BACKEND.md | 0 .../ALTER-SYSTEM-DROP-BACKEND.md | 0 .../ALTER-SYSTEM-DROP-BROKER.md | 0 .../ALTER-SYSTEM-DROP-FOLLOWER.md | 0 .../ALTER-SYSTEM-DROP-OBSERVER.md | 0 .../ALTER-SYSTEM-MODIFY-BACKEND.md | 0 .../ALTER-SYSTEM-MODIFY-BROKER.md | 0 .../CANCEL-ALTER-SYSTEM.md | 0 .../Alter/ALTER-DATABASE.md | 0 .../Alter/ALTER-SQL-BLOCK-RULE.md | 0 .../Alter/ALTER-TABLE-BITMAP.md | 0 .../Alter/ALTER-TABLE-COLUMN.md | 0 .../Alter/ALTER-TABLE-PARTITION.md | 0 .../Alter/ALTER-TABLE-PROPERTY.md | 0 .../Alter/ALTER-TABLE-RENAME.md | 0 .../Alter/ALTER-TABLE-REPLACE.md | 0 .../Alter/ALTER-TABLE-ROLLUP.md | 0 .../Alter/ALTER-VIEW.md | 0 .../Alter/CANCEL-ALTER-TABLE.md | 0 .../Backup-and-Restore/BACKUP.md | 0 .../Backup-and-Restore/CANCEL-BACKUP.md | 0 .../Backup-and-Restore/CANCEL-RESTORE.md | 0 .../Backup-and-Restore/CREATE-REPOSITORY.md | 0 .../Backup-and-Restore/DROP-REPOSITORY.md | 0 .../Backup-and-Restore/RECOVER.md | 0 .../Backup-and-Restore/RESTORE.md | 0 .../Create/CREATE-DATABASE.md | 0 .../Create/CREATE-ENCRYPT-KEY.md | 0 .../Create/CREATE-EXTERNAL-TABLE.md | 0 .../Create/CREATE-FILE.md | 0 .../Create/CREATE-FUNCTION.md | 0 .../Create/CREATE-INDEX.md | 0 .../Create/CREATE-MATERIALIZED-VIEW.md | 0 .../Create/CREATE-RESOURCE.md | 0 .../Create/CREATE-SQL-BLOCK-RULE.md | 0 .../Create/CREATE-TABLE-LIKE.md | 0 .../Create/CREATE-TABLE.md | 0 .../Create/CREATE-VIEW.md | 0 .../Drop/DROP-DATABASE.md | 0 .../Drop/DROP-ENCRYPT-KEY.md | 0 .../Drop/DROP-FILE.md | 0 .../Drop/DROP-FUNCTION.md | 0 .../Drop/DROP-INDEX.md | 0 .../Drop/DROP-MATERIALIZED-VIEW.md | 0 .../Drop/DROP-RESOURCE.md | 0 .../Drop/DROP-SQL-BLOCK-RULE.md | 0 .../Drop/DROP-TABLE.md | 0 .../Drop/TRUNCATE-TABLE.md | 0 .../Load/ALTER-ROUTINE-LOAD.md | 0 .../Load/BROKER-LOAD.md | 0 .../Load/CANCEL-LOAD.md | 0 .../Load/CREATE-ROUTINE-LOAD.md | 0 .../Load/CREATE-SYNC-JOB.md | 0 .../Load/MULTI-LOAD.md | 0 .../Load/PAUSE-ROUTINE-LOAD.md | 0 .../Load/PAUSE-SYNC-JOB.md | 0 .../Load/RESUME-ROUTINE-LOAD.md | 0 .../Load/RESUME-SYNC-JOB.md | 0 .../Load/STOP-ROUTINE-LOAD.md | 0 .../Load/STOP-SYNC-JOB.md | 0 .../Load/STREAM-LOAD.md | 0 .../Manipulation/DELETE.md | 0 .../Manipulation/INSERT.md | 0 .../Manipulation/UPDATE.md | 0 .../Data-Manipulation-Statements/OUTFILE.md | 0 .../sql-reference-v2/Data-Types}/BIGINT.md | 0 .../sql-reference-v2/Data-Types}/BITMAP.md | 0 .../sql-reference-v2/Data-Types}/BOOLEAN.md | 0 .../sql-reference-v2/Data-Types}/CHAR.md | 0 .../sql-reference-v2/Data-Types}/DATE.md | 0 .../sql-reference-v2/Data-Types}/DATETIME.md | 0 .../sql-reference-v2/Data-Types}/DECIMAL.md | 0 .../sql-reference-v2/Data-Types}/DOUBLE.md | 0 .../sql-reference-v2/Data-Types}/FLOAT.md | 0 .../sql-reference-v2/Data-Types}/HLL.md | 0 .../sql-reference-v2/Data-Types}/INT.md | 0 .../Data-Types}/QUANTILE_STATE.md | 0 .../sql-reference-v2/Data-Types}/SMALLINT.md | 0 .../sql-reference-v2/Data-Types}/STRING.md | 0 .../sql-reference-v2/Data-Types}/TINYINT.md | 0 .../sql-reference-v2/Data-Types}/VARCHAR.md | 0 .../ADMIN-CANCEL-REPAIR.md | 0 .../ADMIN-CHECK-TABLET.md | 0 .../ADMIN-CLEAN-TRASH.md | 0 .../ADMIN-REPAIR-TABLE.md | 0 .../ADMIN-SET-CONFIG.md | 0 .../ADMIN-SET-REPLICA-STATUS.md | 0 .../ADMIN-SHOW-CONFIG.md | 0 .../ADMIN-SHOW-REPLICA-DISTRIBUTION.md | 0 .../ADMIN-SHOW-REPLICA-STATUS.md | 0 .../ADMIN-SHOW-TABLET-STORAGE-FORMAT.md} | 0 .../ENABLE-FEATURE.md | 0 .../INSTALL-PLUGIN.md | 0 .../KILL.md | 0 .../RECOVER.md | 0 .../SET-VARIABLE.md | 0 .../UNINSTALL-PLUGIN.md | 0 .../SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md | 0 .../Show-Statements/SHOW-ALTER.md | 0 .../Show-Statements/SHOW-BACKENDS.md | 0 .../Show-Statements/SHOW-BACKUP.md | 0 .../Show-Statements/SHOW-BROKER.md | 0 .../Show-Statements/SHOW-CHARSET.md | 0 .../Show-Statements/SHOW-COLLATION.md | 0 .../Show-Statements/SHOW-COLUMNS.md | 0 .../Show-Statements/SHOW-CREATE-DATABASE.md | 0 .../Show-Statements/SHOW-CREATE-FUNCTION.md | 0 .../SHOW-CREATE-ROUTINE-LOAD.md | 0 .../Show-Statements/SHOW-CREATE-TABLE.md | 0 .../Show-Statements/SHOW-DATA.md | 0 .../Show-Statements/SHOW-DATABASE-ID.md | 0 .../Show-Statements/SHOW-DATABASES.md | 0 .../Show-Statements/SHOW-DELETE.md | 0 .../Show-Statements/SHOW-DYNAMIC-PARTITION.md | 0 .../Show-Statements/SHOW-ENCRYPT-KEY.md | 0 .../Show-Statements/SHOW-ENGINES.md | 0 .../Show-Statements/SHOW-EVENTS.md | 0 .../Show-Statements/SHOW-EXPORT.md | 0 .../Show-Statements/SHOW-FILE.md | 0 .../Show-Statements/SHOW-FRONTENDS.md | 0 .../Show-Statements/SHOW-FUNCTIONS.md | 0 .../Show-Statements/SHOW-GRANTS.md | 0 .../Show-Statements/SHOW-INDEX.md | 0 .../Show-Statements/SHOW-LAST-INSERT.md | 0 .../Show-Statements/SHOW-LOAD-PROFILE.md | 0 .../Show-Statements/SHOW-LOAD-WARNINGS.md | 0 .../Show-Statements/SHOW-LOAD.md | 0 .../Show-Statements/SHOW-MIGRATIONS.md | 0 .../Show-Statements/SHOW-OPEN-TABLES.md | 0 .../Show-Statements/SHOW-PARTITION-ID.md | 0 .../Show-Statements/SHOW-PARTITIONS.md | 0 .../Show-Statements/SHOW-PLUGINS.md | 0 .../Show-Statements/SHOW-PROC.md | 0 .../Show-Statements/SHOW-PROCEDURE.md | 0 .../Show-Statements/SHOW-PROCESSLIST.md | 0 .../Show-Statements/SHOW-PROPERTY.md | 0 .../Show-Statements/SHOW-QUERY-PROFILE.md | 0 .../Show-Statements/SHOW-REPOSITORIES.md | 0 .../Show-Statements/SHOW-RESOURCES.md | 0 .../Show-Statements/SHOW-RESTORE.md | 0 .../Show-Statements/SHOW-ROLES.md | 0 .../Show-Statements/SHOW-ROLLUP.md | 0 .../Show-Statements/SHOW-ROUTINE-LOAD-TASK.md | 0 .../Show-Statements/SHOW-ROUTINE-LOAD.md | 0 .../Show-Statements/SHOW-SMALL-FILES.md | 0 .../Show-Statements/SHOW-SNAPSHOT.md | 0 .../Show-Statements/SHOW-SQL-BLOCK-RULE.md | 0 .../Show-Statements/SHOW-STATUS.md | 0 .../Show-Statements/SHOW-STREAM-LOAD.md | 0 .../Show-Statements/SHOW-SYNC-JOB.md | 0 .../Show-Statements/SHOW-TABLE-ID.md | 0 .../Show-Statements/SHOW-TABLE-STATUS.md | 0 .../Show-Statements/SHOW-TABLET.md | 0 .../Show-Statements/SHOW-TRANSACTION.md | 0 .../Show-Statements/SHOW-TRASH.md | 0 .../Show-Statements/SHOW-TRIGGERS.md | 0 .../Show-Statements/SHOW-USER.md | 0 .../Show-Statements/SHOW-VARIABLES.md | 0 .../Show-Statements/SHOW-VIEW.md | 0 .../Show-Statements/SHOW-WARNING.md | 0 .../Show-Statements/SHOW-WHITE-LIST.md | 0 .../Utility-Statements/DESCRIBE.md | 0 .../Utility-Statements/HELP.md | 0 .../Utility-Statements/USE.md | 0 .../ALTER-USER.md | 38 - .../CREATE-ROLE.md | 38 - .../CREATE-USER.md | 38 - .../DROP-ROLE.md | 38 - .../DROP-USER.md | 38 - .../Account-Management-Statements/GRANT.md | 38 - .../Account-Management-Statements/LDAP.md | 38 - .../Account-Management-Statements/REVOKE.md | 38 - .../SET-PASSWORD.md | 38 - .../SET-PROPERTY.md | 38 - .../ALTER-SYSTEM-ADD-BACKEND.md | 38 - .../ALTER-SYSTEM-ADD-FOLLOWER.md | 38 - .../ALTER-SYSTEM-ADD-OBSERVER.md | 38 - .../ALTER-SYSTEM-DECOMMISSION-BACKEND.md | 38 - .../ALTER-SYSTEM-DROP-BACKEND.md | 38 - .../ALTER-SYSTEM-DROP-FOLLOWER.md | 38 - .../ALTER-SYSTEM-DROP-OBSERVER.md | 38 - .../CANCEL-ALTER-SYSTEM.md | 38 - .../Alter/ALTER-DATABASE.md | 38 - .../Alter/ALTER-TABLE-COLUMN.md | 38 - .../Alter/ALTER-TABLE-PARTITION.md | 38 - .../Alter/ALTER-TABLE-PROPERTY.md | 38 - .../Alter/ALTER-TABLE-RENAME.md | 38 - .../Alter/ALTER-TABLE-REPLACE.md | 38 - .../Alter/ALTER-TABLE-ROLLUP.md | 38 - .../Alter/ALTER-VIEW.md | 38 - .../Alter/CANCEL-ALTER-TABLE.md | 38 - .../Backup-and-Restore/BACKUP.md | 38 - .../Backup-and-Restore/CANCEL-BACKUP.md | 38 - .../Backup-and-Restore/CANCEL-RESTORE.md | 38 - .../Backup-and-Restore/CREATE-REPOSITORY.md | 38 - .../Backup-and-Restore/DROP-REPOSITORY.md | 38 - .../Backup-and-Restore/RESTORE.md | 38 - .../Create/CREATE-DATABASE.md | 38 - .../Create/CREATE-ENCRYPT-KEY.md | 38 - .../Create/CREATE-FILE.md | 38 - .../Create/CREATE-FUNCTION.md | 38 - .../Create/CREATE-INDEX.md | 38 - .../Create/CREATE-MATERIALIZED-VIEW.md | 38 - .../Create/CREATE-RESOURCE.md | 38 - .../Create/CREATE-TABLE-LIKE.md | 38 - .../Create/CREATE-TABLE.md | 568 - .../Create/CREATE-VIEW.md | 38 - .../Drop/DROP-DATABASE.md | 38 - .../Drop/DROP-ENCRYPT-KEY.md | 38 - .../Drop/DROP-FILE.md | 38 - .../Drop/DROP-FUNCTION.md | 38 - .../Drop/DROP-INDEX.md | 38 - .../Drop/DROP-MATERIALIZED-VIEW.md | 38 - .../Drop/DROP-RESOURCE.md | 38 - .../Drop/DROP-TABLE.md | 38 - .../Drop/TRUNCATE-TABLE.md | 38 - .../Load/ALTER-ROUTINE-LOAD.md | 38 - .../Load/BROKER-LOAD.md | 38 - .../Load/CANCEL-LOAD.md | 38 - .../Load/CREATE-ROUTINE-LOAD.md | 38 - .../Load/PAUSE-ROUTINE-LOAD.md | 38 - .../Load/RESUME-ROUTINE-LOAD.md | 38 - .../Load/STOP-ROUTINE-LOAD.md | 38 - .../Load/STREAM-LOAD.md | 38 - .../Manipulation/DELETE.md | 38 - .../Manipulation/INSERT.md | 38 - .../Manipulation/UPDATE.md | 38 - .../ADMIN-CANCEL-REPAIR.md | 38 - .../ADMIN-CHECK-TABLET.md | 38 - .../ADMIN-REPAIR-TABLE.md | 38 - .../ADMIN-SET-CONFIG.md | 38 - .../ADMIN-SET-REPLICA-STATUS.md | 38 - .../ADMIN-SHOW-CONFIG.md | 38 - .../ADMIN-SHOW-REPLICA-DISTRIBUTION.md | 38 - .../ADMIN-SHOW-REPLICA-STATUS.md | 38 - .../INSTALL-PLUGIN.md | 38 - .../KILL.md | 38 - .../RECOVER.md | 38 - .../SET-VARIABLE.md | 38 - .../UNINSTALL-PLUGIN.md | 38 - .../Show-Statements/SHOW-ALTER.md | 38 - .../Show-Statements/SHOW-BACKENDS.md | 38 - .../Show-Statements/SHOW-BACKUP.md | 38 - .../Show-Statements/SHOW-BROKER.md | 38 - .../Show-Statements/SHOW-COLUMNS.md | 38 - .../Show-Statements/SHOW-CREATE-DATABASE.md | 38 - .../Show-Statements/SHOW-CREATE-FUNCTION.md | 38 - .../SHOW-CREATE-ROUTINE-LOAD.md | 38 - .../Show-Statements/SHOW-CREATE-TABLE.md | 38 - .../Show-Statements/SHOW-DATA.md | 38 - .../Show-Statements/SHOW-DATABASE-ID.md | 38 - .../Show-Statements/SHOW-DATABASES.md | 38 - .../Show-Statements/SHOW-DELETE.md | 38 - .../Show-Statements/SHOW-DYNAMIC-PARTITION.md | 38 - .../Show-Statements/SHOW-ENCRYPT-KEY.md | 38 - .../Show-Statements/SHOW-EXPORT.md | 38 - .../Show-Statements/SHOW-FRONTENDS.md | 38 - .../Show-Statements/SHOW-FUNCTIONS.md | 38 - .../Show-Statements/SHOW-GRANTS.md | 38 - .../Show-Statements/SHOW-INDEX.md | 38 - .../Show-Statements/SHOW-LOAD-PROFILE.md | 38 - .../Show-Statements/SHOW-LOAD-WARNINGS.md | 38 - .../Show-Statements/SHOW-LOAD.md | 38 - .../Show-Statements/SHOW-MIGRATIONS.md | 38 - .../Show-Statements/SHOW-PARTITION-ID.md | 38 - .../Show-Statements/SHOW-PARTITIONS.md | 38 - .../Show-Statements/SHOW-PLUGINS.md | 38 - .../Show-Statements/SHOW-PROC.md | 38 - .../Show-Statements/SHOW-PROCESSLIST.md | 38 - .../Show-Statements/SHOW-PROPERTY.md | 83 - .../Show-Statements/SHOW-REPOSITORIES.md | 38 - .../Show-Statements/SHOW-RESOURCES.md | 38 - .../Show-Statements/SHOW-RESTORE.md | 38 - .../Show-Statements/SHOW-ROLES.md | 38 - .../Show-Statements/SHOW-ROUTINE-LOAD-TASK.md | 38 - .../Show-Statements/SHOW-ROUTINE-LOAD.md | 38 - .../Show-Statements/SHOW-SMALL-FILES.md | 38 - .../Show-Statements/SHOW-SNAPSHOT.md | 38 - .../Show-Statements/SHOW-STATUS.md | 38 - .../Show-Statements/SHOW-STREAM-LOAD.md | 38 - .../Show-Statements/SHOW-TABLE-ID.md | 38 - .../Show-Statements/SHOW-TABLE-STATUS.md | 38 - .../Show-Statements/SHOW-TABLET.md | 38 - .../Show-Statements/SHOW-TRANSACTION.md | 38 - .../Show-Statements/SHOW-VARIABLES.md | 38 - .../Show-Statements/SHOW-VIEW.md | 38 - .../Utility-Statements/DESCRIBE.md | 38 - .../sql-statements/Utility-Statements/HELP.md | 38 - .../sql-statements/Utility-Statements/USE.md | 38 - .../Account Management/CREATE ROLE.md | 45 - .../Account Management/CREATE USER.md | 74 - .../Account Management/DROP ROLE.md | 43 - .../Account Management/DROP USER.md | 49 - .../Account Management/GRANT.md | 81 - .../Account Management/REVOKE.md | 48 - .../Account Management/SET PASSWORD.md | 55 - .../Account Management/SET PROPERTY.md | 108 - .../Account Management/SHOW GRANTS.md | 56 - .../Account Management/SHOW ROLES.md | 41 - .../ADMIN CANCEL REBALANCE DISK.md | 51 - .../Administration/ADMIN CANCEL REPAIR.md | 47 - .../Administration/ADMIN CHECK TABLET.md | 57 - .../Administration/ADMIN CLEAN TRASH.md | 47 - .../Administration/ADMIN COMPACT.md | 52 - .../Administration/ADMIN REBALANCE DISK.md | 52 - .../Administration/ADMIN REPAIR.md | 52 - .../Administration/ADMIN SET CONFIG.md | 44 - .../ADMIN SET REPLICA STATUS.md | 62 - .../Administration/ADMIN SHOW CONFIG.md | 63 - .../ADMIN SHOW REPLICA DISTRIBUTION.md | 51 - .../ADMIN SHOW REPLICA STATUS.md | 64 - .../Administration/ADMIN-DIAGNOSE-TABLET.md | 59 - .../Administration/ALTER CLUSTER.md | 49 - .../Administration/ALTER SYSTEM.md | 141 - .../Administration/CANCEL DECOMMISSION.md | 40 - .../Administration/CREATE CLUSTER.md | 60 - .../Administration/CREATE FILE.md | 76 - .../Administration/DROP CLUSTER.md | 43 - .../Administration/DROP FILE.md | 51 - .../sql-statements/Administration/ENTER.md | 44 - .../Administration/INSTALL PLUGIN.md | 63 - .../Administration/LINK DATABASE.md | 49 - .../Administration/MIGRATE DATABASE.md | 45 - .../Administration/SET LDAP_ADMIN_PASSWORD.md | 45 - .../Administration/SHOW BACKENDS.md | 48 - .../Administration/SHOW BROKER.md | 40 - .../Administration/SHOW FILE.md | 52 - .../Administration/SHOW FRONTENDS.md | 43 - .../Administration/SHOW FULL COLUMNS.md | 42 - .../Administration/SHOW INDEX.md | 46 - .../Administration/SHOW MIGRATIONS.md | 37 - .../Administration/SHOW PLUGINS.md | 45 - .../Administration/SHOW TABLE STATUS.md | 55 - .../Administration/SHOW TRASH.md | 53 - .../Administration/SHOW VIEW.md | 46 - .../Administration/UNINSTALL PLUGIN.md | 47 - .../Data Definition/ALTER DATABASE.md | 56 - .../Data Definition/ALTER RESOURCE.md | 48 - .../Data Definition/ALTER TABLE.md | 445 - .../Data Definition/ALTER VIEW.md | 51 - .../sql-statements/Data Definition/BACKUP.md | 71 - .../Data Definition/CANCEL ALTER.md | 70 - .../Data Definition/CANCEL BACKUP.md | 39 - .../Data Definition/CANCEL RESTORE.md | 42 - .../Data Definition/CREATE DATABASE.md | 69 - .../Data Definition/CREATE ENCRYPTKEY.md | 80 - .../Data Definition/CREATE INDEX.md | 45 - .../CREATE MATERIALIZED VIEW.md | 238 - .../Data Definition/CREATE REPOSITORY.md | 87 - .../Data Definition/CREATE RESOURCE.md | 134 - .../Data Definition/CREATE TABLE LIKE.md | 78 - .../Data Definition/CREATE TABLE.md | 879 - .../Data Definition/CREATE VIEW.md | 68 - .../Data Definition/Colocate Join.md | 98 - .../Data Definition/DROP DATABASE.md | 43 - .../Data Definition/DROP ENCRYPTKEY.md | 55 - .../Data Definition/DROP INDEX.md | 37 - .../Data Definition/DROP MATERIALIZED VIEW.md | 110 - .../Data Definition/DROP REPOSITORY.md | 41 - .../Data Definition/DROP RESOURCE.md | 46 - .../Data Definition/DROP TABLE.md | 46 - .../Data Definition/DROP VIEW.md | 40 - .../sql-statements/Data Definition/HLL.md | 111 - .../sql-statements/Data Definition/RECOVER.md | 54 - .../Data Definition/REFRESH DATABASE.md | 45 - .../Data Definition/REFRESH TABLE.md | 45 - .../sql-statements/Data Definition/RESTORE.md | 87 - .../Data Definition/SHOW ENCRYPTKEYS.md | 68 - .../Data Definition/SHOW RESOURCES.md | 67 - .../Data Definition/TRUNCATE TABLE.md | 52 - .../Data Definition/create-function.md | 152 - .../Data Definition/drop-function.md | 54 - .../Data Definition/show-functions.md | 83 - .../sql-statements/Data Manipulation/BEGIN.md | 92 - .../Data Manipulation/BROKER LOAD.md | 587 - .../Data Manipulation/CANCEL DELETE.md | 36 - .../Data Manipulation/CANCEL LABEL.md | 53 - .../Data Manipulation/CANCEL LOAD.md | 45 - .../Data Manipulation/CREATE SYNC JOB.md | 165 - .../Data Manipulation/DELETE.md | 66 - .../Data Manipulation/EXPORT.md | 125 - .../Data Manipulation/GET LABEL STATE.md | 58 - .../Data Manipulation/GROUP BY.md | 168 - .../sql-statements/Data Manipulation/LOAD.md | 291 - .../Data Manipulation/MINI LOAD.md | 132 - .../Data Manipulation/MULTI LOAD.md | 107 - .../Data Manipulation/OUTFILE.md | 207 - .../Data Manipulation/PAUSE ROUTINE LOAD.md | 40 - .../Data Manipulation/PAUSE SYNC JOB.md | 48 - .../Data Manipulation/RESTORE TABLET.md | 41 - .../Data Manipulation/RESUME ROUTINE LOAD.md | 40 - .../Data Manipulation/RESUME SYNC JOB.md | 46 - .../Data Manipulation/ROUTINE LOAD.md | 588 - .../Data Manipulation/SHOW ALTER.md | 55 - .../Data Manipulation/SHOW BACKUP.md | 62 - .../Data Manipulation/SHOW CREATE FUNCTION.md | 43 - .../SHOW CREATE ROUTINE LOAD.md | 45 - .../Data Manipulation/SHOW DATA SKEW.md | 50 - .../Data Manipulation/SHOW DATA.md | 110 - .../Data Manipulation/SHOW DATABASE ID.md | 38 - .../Data Manipulation/SHOW DATABASES.md | 35 - .../Data Manipulation/SHOW DELETE.md | 39 - .../SHOW DYNAMIC PARTITION TABLES.md | 36 - .../Data Manipulation/SHOW EXPORT.md | 70 - .../Data Manipulation/SHOW LOAD.md | 74 - .../Data Manipulation/SHOW PARTITION ID.md | 38 - .../Data Manipulation/SHOW PARTITIONS.md | 48 - .../Data Manipulation/SHOW PROPERTY.md | 42 - .../Data Manipulation/SHOW REPOSITORIES.md | 49 - .../Data Manipulation/SHOW RESTORE.md | 67 - .../SHOW ROUTINE LOAD TASK.md | 35 - .../Data Manipulation/SHOW ROUTINE LOAD.md | 107 - .../Data Manipulation/SHOW SNAPSHOT.md | 56 - .../Data Manipulation/SHOW STREAM LOAD.md | 68 - .../Data Manipulation/SHOW SYNC JOB.md | 49 - .../Data Manipulation/SHOW TABLE CREATION.md | 82 - .../Data Manipulation/SHOW TABLE ID.md | 38 - .../Data Manipulation/SHOW TABLES.md | 34 - .../Data Manipulation/SHOW TABLET.md | 39 - .../Data Manipulation/SHOW TABLETS.md | 56 - .../Data Manipulation/SHOW TRANSACTION.md | 100 - .../Data Manipulation/SHOW-LAST-INSERT.md | 67 - .../Data Manipulation/STOP ROUTINE LOAD.md | 35 - .../Data Manipulation/STOP SYNC JOB.md | 44 - .../Data Manipulation/STREAM LOAD.md | 286 - .../Data Manipulation/UPDATE.md | 75 - .../Data Manipulation/alter-routine-load.md | 115 - .../Data Manipulation/insert.md | 110 - .../Data Manipulation/lateral-view.md | 94 - .../sql-statements/Utility/util_stmt.md | 39 - .../en/summary/basic-summary.md | 0 .../en/summary/system-architecture.md | 4 +- docs/zh-CN/README.md | 2 +- .../cluster-management/elastic-expansion.md | 0 .../cluster-management/load-balancing.md | 0 .../cluster-management/upgrade.md | 0 .../zh-CN/admin-manual/config/be-config.md | 0 .../zh-CN/admin-manual/config/fe-config.md | 0 .../admin-manual/config/user-property.md | 0 .../zh-CN/admin-manual/data-admin/backup.md | 0 .../admin-manual/data-admin/delete-recover.md | 0 .../zh-CN/admin-manual/data-admin/restore.md | 0 .../http-actions/cancel-label.md | 0 .../http-actions/check-reset-rpc-cache.md | 0 .../http-actions/compaction-action.md | 0 .../http-actions/connection-action.md | 0 .../http-actions/fe-get-log-file.md | 0 .../http-actions/fe/backends-action.md | 0 .../http-actions/fe/bootstrap-action.md | 0 .../http-actions/fe/cancel-load-action.md | 0 .../fe/check-decommission-action.md | 0 .../fe/check-storage-type-action.md | 0 .../http-actions/fe/config-action.md | 0 .../http-actions/fe/connection-action.md | 0 .../http-actions/fe/get-ddl-stmt-action.md | 0 .../http-actions/fe/get-load-info-action.md | 0 .../http-actions/fe/get-load-state.md | 0 .../http-actions/fe/get-log-file-action.md | 0 .../http-actions/fe/get-small-file.md | 0 .../http-actions/fe/ha-action.md | 0 .../http-actions/fe/hardware-info-action.md | 0 .../http-actions/fe/health-action.md | 0 .../http-actions/fe/log-action.md | 0 .../http-actions/fe/logout-action.md | 0 .../http-actions/fe/manager/cluster-action.md | 0 .../http-actions/fe/manager/node-action.md | 0 .../fe/manager/query-profile-action.md | 0 .../http-actions/fe/meta-action.md | 0 .../http-actions/fe/meta-info-action.md | 0 .../fe/meta-replay-state-action.md | 0 .../http-actions/fe/profile-action.md | 0 .../http-actions/fe/query-detail-action.md | 0 .../http-actions/fe/query-profile-action.md | 0 .../http-actions/fe/row-count-action.md | 0 .../http-actions/fe/session-action.md | 0 .../http-actions/fe/set-config-action.md | 0 .../http-actions/fe/show-data-action.md | 0 .../http-actions/fe/show-meta-info-action.md | 0 .../http-actions/fe/show-proc-action.md | 0 .../fe/show-runtime-info-action.md | 0 .../fe/statement-execution-action.md | 0 .../http-actions/fe/system-action.md | 0 .../fe/table-query-plan-action.md | 0 .../http-actions/fe/table-row-count-action.md | 0 .../http-actions/fe/table-schema-action.md | 0 .../http-actions/fe/upload-action.md | 0 .../http-actions/get-load-state.md | 0 .../http-actions/get-tablets.md | 0 .../http-actions/profile-action.md | 0 .../http-actions/query-detail-action.md | 0 .../http-actions/restore-tablet.md | 0 .../http-actions/show-data-action.md | 0 .../http-actions/tablet-migration-action.md | 0 .../http-actions/tablets_distribution.md | 0 .../maint-monitor/be-olap-error-code.md | 0 .../maint-monitor/disk-capacity.md | 0 .../maint-monitor/doris-error-code.md | 0 .../maint-monitor/metadata-operation.md | 0 .../maint-monitor}/monitor-alert.md | 0 .../monitor-metrics/be-metrics.md | 0 .../monitor-metrics/fe-metrics.md | 0 .../maint-monitor}/tablet-meta-tool.md | 0 .../tablet-repair-and-balance.md | 0 .../maint-monitor}/tablet-restore-tool.md | 0 .../zh-CN/admin-manual/multi-tenant.md | 0 .../zh-CN/admin-manual/optimization.md | 0 .../zh-CN/admin-manual/privilege-ldap/ldap.md | 0 .../privilege-ldap/user-privilege.md | 0 .../query-profile.md} | 7 +- .../zh-CN/admin-manual/sql-interception.md | 0 .../alter-table/alter-table-bitmap-index.md | 82 - .../alter-table/alter-table-replace-table.md | 73 - .../alter-table/alter-table-rollup.md | 194 - .../alter-table/alter-table-schema-change.md | 249 - .../alter-table/alter-table-temp-partition.md | 298 - .../administrator-guide/backup-restore.md | 193 - .../block-rule/sql-block.md | 93 - docs/zh-CN/administrator-guide/bloomfilter.md | 133 - docs/zh-CN/administrator-guide/broker.md | 282 - .../bucket-shuffle-join.md | 106 - .../administrator-guide/colocation-join.md | 409 - .../administrator-guide/config/be_config.md | 1543 - .../administrator-guide/config/fe_config.md | 2234 -- .../config/user_property.md | 73 - .../administrator-guide/dynamic-partition.md | 460 - .../administrator-guide/export-manual.md | 202 - .../export_with_mysql_dump.md | 41 - docs/zh-CN/administrator-guide/ldap.md | 177 - .../load-data/batch-delete-manual.md | 205 - .../load-data/binlog-load-manual.md | 502 - .../load-data/broker-load-manual.md | 544 - .../load-data/delete-manual.md | 189 - .../load-data/insert-into-manual.md | 310 - .../load-data/load-json-format.md | 470 - .../load-data/load-manual.md | 227 - .../load-data/routine-load-manual.md | 335 - .../load-data/s3-load-manual.md | 94 - .../load-data/sequence-column-manual.md | 208 - .../load-data/spark-load-manual.md | 596 - .../load-data/stream-load-manual.md | 415 - .../administrator-guide/materialized_view.md | 488 - .../zh-CN/administrator-guide/multi-tenant.md | 222 - .../operation/be-olap-error-code.md | 265 - .../operation/disk-capacity.md | 163 - .../operation/doris-error-code.md | 179 - .../operation/metadata-operation.md | 403 - .../operation/multi-tenant.md | 239 - .../operation/tablet-repair-and-balance.md | 775 - .../orthogonal-bitmap-manual.md | 161 - docs/zh-CN/administrator-guide/outfile.md | 192 - .../administrator-guide/partition_cache.md | 197 - docs/zh-CN/administrator-guide/privilege.md | 234 - docs/zh-CN/administrator-guide/query_cache.md | 155 - .../resource-management.md | 170 - .../administrator-guide/runtime-filter.md | 282 - .../administrator-guide/segment-v2-usage.md | 157 - .../administrator-guide/small-file-mgr.md | 104 - docs/zh-CN/administrator-guide/sql-mode.md | 76 - docs/zh-CN/administrator-guide/time-zone.md | 91 - docs/zh-CN/administrator-guide/update.md | 126 - docs/zh-CN/administrator-guide/variables.md | 492 - .../advanced/alter-table/replace-table.md | 0 .../advanced/alter-table/schema-change.md | 0 .../zh-CN/advanced/best-practice/debug-log.md | 0 .../advanced/best-practice/import-analysis.md | 0 .../advanced/best-practice/query-analysis.md | 0 {new-docs => docs}/zh-CN/advanced/broker.md | 0 .../zh-CN/advanced/cache/partition-cache.md | 0 .../join-optimization/bucket-shuffle-join.md | 0 .../join-optimization/colocation-join.md | 0 .../join-optimization/runtime-filter.md | 0 .../zh-CN/advanced/materialized-view.md | 0 .../advanced/orthogonal-bitmap-manual.md | 0 .../advanced/partition/dynamic-partition.md | 0 .../advanced/partition/table-tmp-partition.md | 0 {new-docs => docs}/zh-CN/advanced/resource.md | 0 .../zh-CN/advanced/small-file-mgr.md | 0 .../zh-CN/advanced/time-zone.md | 0 .../zh-CN/advanced/variables.md | 0 .../vectorized-execution-engine.md | 0 docs/zh-CN/benchmark/samples.md | 57 - {new-docs => docs}/zh-CN/benchmark/ssb.md | 0 docs/zh-CN/benchmark/star-schema-benchmark.md | 182 - docs/zh-CN/benchmark/systemd.md | 31 - {new-docs => docs}/zh-CN/benchmark/tpc-h.md | 0 .../data-operate/export/export-manual.md | 0 .../export/export_with_mysql_dump.md | 0 .../zh-CN/data-operate/export/outfile.md | 0 .../import-scenes/external-storage-load.md | 0 .../import-scenes/external-table-load.md | 0 .../import/import-scenes/jdbc-load.md | 0 .../import/import-scenes/kafka-load.md | 0 .../import/import-scenes/load-atomicity.md | 0 .../import/import-scenes/load-data-convert.md | 0 .../import/import-scenes/load-strict-mode.md | 0 .../import/import-scenes/local-file-load.md | 0 .../import/import-way/binlog-load-manual.md | 0 .../import/import-way/broker-load-manual.md | 0 .../import/import-way/insert-into-manual.md | 0 .../import/import-way/load-json-format.md | 0 .../import/import-way/routine-load-manual.md | 0 .../import/import-way/s3-load-manual.md | 0 .../import/import-way/spark-load-manual.md | 0 .../import/import-way/stream-load-manual.md | 0 .../zh-CN/data-operate/import/load-manual.md | 0 .../update-delete/batch-delete-manual.md | 0 .../update-delete/delete-manual.md | 0 .../update-delete/sequence-column-manual.md | 0 .../data-operate/update-delete/update.md | 0 .../zh-CN/data-table/advance-usage.md | 0 .../zh-CN/data-table/basic-usage.md | 0 .../zh-CN/data-table/best-practice.md | 0 .../zh-CN/data-table/data-model.md | 0 .../zh-CN/data-table/data-partition.md | 0 .../zh-CN/data-table/hit-the-rollup.md | 0 .../zh-CN/data-table/index/bitmap-index.md | 0 .../zh-CN/data-table/index/bloomfilter.md | 0 .../zh-CN/data-table/index/prefix-index.md | 0 .../zh-CN/ecosystem/audit-plugin.md | 0 {new-docs => docs}/zh-CN/ecosystem/datax.md | 0 .../doris-manager/cluster-managenent.md | 0 .../doris-manager/compiling-deploying.md | 0 .../doris-manager/initializing.md | 0 .../doris-manager/space-list.md | 0 .../doris-manager/space-management.md | 0 .../doris-manager/system-settings.md | 0 .../ecosystem/external-table/doris-on-es.md | 0 .../ecosystem/external-table/hive-of-doris.md | 0 .../external-table/iceberg-of-doris.md | 0 .../ecosystem/external-table/odbc-of-doris.md | 0 .../zh-CN/ecosystem/flink-doris-connector.md | 0 .../zh-CN/ecosystem/logstash.md | 0 .../ecosystem/plugin-development-manual.md | 0 .../zh-CN/ecosystem/seatunnel/flink-sink.md | 0 .../zh-CN/ecosystem/seatunnel/spark-sink.md | 0 .../zh-CN/ecosystem/spark-doris-connector.md | 0 .../zh-CN/ecosystem/udf/contribute-udf.md | 0 .../udf/native-user-defined-function.md | 0 .../udf/remote-user-defined-function.md | 0 docs/zh-CN/extending-doris/audit-plugin.md | 119 - docs/zh-CN/extending-doris/datax.md | 104 - docs/zh-CN/extending-doris/doris-on-es.md | 588 - .../extending-doris/flink-doris-connector.md | 497 - docs/zh-CN/extending-doris/hive-bitmap-udf.md | 104 - docs/zh-CN/extending-doris/hive-of-doris.md | 117 - .../zh-CN/extending-doris/iceberg-of-doris.md | 210 - docs/zh-CN/extending-doris/logstash.md | 198 - docs/zh-CN/extending-doris/odbc-of-doris.md | 361 - .../plugin-development-manual.md | 312 - .../extending-doris/seatunnel/flink-sink.md | 116 - .../extending-doris/seatunnel/spark-sink.md | 124 - .../extending-doris/spark-doris-connector.md | 291 - .../extending-doris/udf/contribute-udf.md | 124 - .../udf/java-user-defined-function.md | 88 - .../udf/native-user-defined-function.md | 267 - .../udf/remote-user-defined-function.md | 110 - {new-docs => docs}/zh-CN/faq/data-faq.md | 0 docs/zh-CN/faq/error.md | 151 - docs/zh-CN/faq/faq.md | 297 - {new-docs => docs}/zh-CN/faq/install-faq.md | 0 {new-docs => docs}/zh-CN/faq/sql-faq.md | 0 .../zh-CN/get-starting/get-starting.md | 4 +- docs/zh-CN/getting-started/advance-usage.md | 280 - docs/zh-CN/getting-started/basic-usage.md | 381 - docs/zh-CN/getting-started/best-practice.md | 197 - .../getting-started/data-model-rollup.md | 638 - docs/zh-CN/getting-started/data-partition.md | 401 - docs/zh-CN/getting-started/hit-the-rollup.md | 296 - .../zh-CN/install/install-deploy.md | 0 .../install/source-install/compilation-arm.md | 0 .../compilation-with-ldb-toolchain.md | 0 .../install/source-install/compilation.md | 0 docs/zh-CN/installing/compilation-arm.md | 256 - .../compilation-with-ldb-toolchain.md | 127 - docs/zh-CN/installing/compilation.md | 261 - docs/zh-CN/installing/install-deploy.md | 475 - docs/zh-CN/installing/upgrade.md | 86 - .../internal/doris_storage_optimization.md | 234 - .../internal/flink_doris_connector_design.md | 272 - docs/zh-CN/internal/grouping_sets_design.md | 517 - docs/zh-CN/internal/metadata-design.md | 126 - docs/zh-CN/internal/spark_load.md | 212 - .../approx_count_distinct.md | 0 .../sql-functions/aggregate-functions/avg.md | 0 .../aggregate-functions/bitmap_union.md | 0 .../aggregate-functions/count.md | 0 .../aggregate-functions/group_concat.md | 0 .../aggregate-functions/hll_union_agg.md | 0 .../sql-functions/aggregate-functions/max.md | 0 .../aggregate-functions/max_by.md | 0 .../sql-functions/aggregate-functions/min.md | 0 .../aggregate-functions/min_by.md | 0 .../aggregate-functions/percentile.md | 0 .../aggregate-functions/percentile_approx.md | 0 .../aggregate-functions/stddev.md | 0 .../aggregate-functions/stddev_samp.md | 0 .../sql-functions/aggregate-functions/sum.md | 0 .../sql-functions/aggregate-functions/topn.md | 0 .../aggregate-functions/var_samp.md | 0 .../aggregate-functions/variance.md | 0 .../bitmap-functions/bitmap_and.md | 0 .../bitmap-functions/bitmap_and_count.md | 0 .../bitmap-functions/bitmap_and_not.md | 0 .../bitmap-functions/bitmap_and_not_count.md | 0 .../bitmap-functions/bitmap_contains.md | 0 .../bitmap-functions/bitmap_empty.md | 0 .../bitmap-functions/bitmap_from_string.md | 0 .../bitmap-functions/bitmap_has_all.md | 0 .../bitmap-functions/bitmap_has_any.md | 0 .../bitmap-functions/bitmap_hash.md | 0 .../bitmap-functions/bitmap_intersect.md | 0 .../bitmap-functions/bitmap_max.md | 0 .../bitmap-functions/bitmap_min.md | 0 .../bitmap-functions/bitmap_not.md | 0 .../bitmap-functions/bitmap_or.md | 0 .../bitmap-functions/bitmap_or_count.md | 0 .../bitmap_subset_in_range.md | 0 .../bitmap-functions/bitmap_subset_limit.md | 0 .../bitmap-functions/bitmap_to_string.md | 0 .../bitmap-functions/bitmap_union.md | 0 .../bitmap-functions/bitmap_xor.md | 0 .../bitmap-functions/bitmap_xor_count.md | 0 .../orthogonal_bitmap_intersect.md | 0 .../orthogonal_bitmap_intersect_count.md | 0 .../orthogonal_bitmap_union_count.md | 0 .../bitmap-functions/sub_bitmap.md | 0 .../bitmap-functions/to_bitmap.md | 0 .../sql-functions/bitwise-functions/bitand.md | 0 .../sql-functions/bitwise-functions/bitnot.md | 0 .../sql-functions/bitwise-functions/bitor.md | 0 .../sql-functions/bitwise-functions/bitxor.md | 0 .../sql-functions/cast.md | 0 .../conditional-functions/case.md | 0 .../conditional-functions/coalesce.md | 0 .../sql-functions/conditional-functions/if.md | 0 .../conditional-functions/ifnull.md | 0 .../conditional-functions/nullif.md | 0 .../date-time-functions/convert_tz.md | 0 .../date-time-functions/curdate.md | 0 .../date-time-functions/current_timestamp.md | 0 .../date-time-functions/curtime.md | 0 .../date-time-functions/date_add.md | 0 .../date-time-functions/date_format.md | 0 .../date-time-functions/date_sub.md | 0 .../date-time-functions/datediff.md | 0 .../sql-functions/date-time-functions/day.md | 0 .../date-time-functions/dayname.md | 0 .../date-time-functions/dayofmonth.md | 0 .../date-time-functions/dayofweek.md | 0 .../date-time-functions/dayofyear.md | 0 .../date-time-functions/from_days.md | 0 .../date-time-functions/from_unixtime.md | 0 .../sql-functions/date-time-functions/hour.md | 0 .../date-time-functions/makedate.md | 0 .../date-time-functions/minute.md | 0 .../date-time-functions/month.md | 0 .../date-time-functions/monthname.md | 0 .../sql-functions/date-time-functions/now.md | 0 .../date-time-functions/second.md | 0 .../date-time-functions/str_to_date.md | 0 .../date-time-functions/time_round.md | 0 .../date-time-functions/timediff.md | 0 .../date-time-functions/timestampadd.md | 0 .../date-time-functions/timestampdiff.md | 0 .../date-time-functions/to_date.md | 0 .../date-time-functions/to_days.md | 0 .../date-time-functions/unix_timestamp.md | 0 .../date-time-functions/utc_timestamp.md | 0 .../sql-functions/date-time-functions/week.md | 0 .../date-time-functions/weekday.md | 0 .../date-time-functions/weekofyear.md | 0 .../sql-functions/date-time-functions/year.md | 0 .../date-time-functions/yearweek.md | 0 .../sql-functions/digital-masking.md | 0 .../encrypt-digest-functions/aes.md | 0 .../encrypt-digest-functions/md5.md | 0 .../encrypt-digest-functions/md5sum.md | 0 .../encrypt-digest-functions/sm3.md | 0 .../encrypt-digest-functions/sm3sum.md | 0 .../encrypt-digest-functions/sm4.md | 0 .../encrypt-dixgest-functions}/aes.md | 0 .../encrypt-dixgest-functions}/md5.md | 0 .../encrypt-dixgest-functions}/md5sum.md | 0 .../encrypt-dixgest-functions}/sm3.md | 0 .../encrypt-dixgest-functions}/sm3sum.md | 0 .../encrypt-dixgest-functions}/sm4.md | 0 .../hash-functions/murmur_hash3_32.md | 0 .../json-functions/get_json_double.md | 0 .../json-functions/get_json_int.md | 0 .../json-functions/get_json_string.md | 0 .../json-functions/json_array.md | 0 .../json-functions/json_object.md | 0 .../json-functions/json_quote.md | 0 .../sql-functions/math-functions/conv.md | 0 .../sql-functions/math-functions/pmod.md | 0 .../spatial-functions/st_astext.md | 0 .../spatial-functions/st_circle.md | 0 .../spatial-functions/st_contains.md | 0 .../spatial-functions/st_distance_sphere.md | 0 .../spatial-functions/st_geometryfromtext.md | 0 .../spatial-functions/st_linefromtext.md | 0 .../spatial-functions/st_point.md | 0 .../spatial-functions/st_polygon.md | 0 .../sql-functions/spatial-functions/st_x.md | 0 .../sql-functions/spatial-functions/st_y.md | 0 .../append_trailing_char_if_absent.md | 0 .../sql-functions/string-functions/ascii.md | 0 .../string-functions/bit_length.md | 0 .../string-functions/char_length.md | 0 .../sql-functions/string-functions/concat.md | 0 .../string-functions/concat_ws.md | 0 .../string-functions/ends_with.md | 0 .../string-functions/find_in_set.md | 0 .../sql-functions/string-functions/hex.md | 0 .../sql-functions/string-functions/instr.md | 0 .../sql-functions/string-functions/lcase.md | 0 .../sql-functions/string-functions/left.md | 0 .../sql-functions/string-functions/length.md | 0 .../string-functions/like/like.md | 0 .../string-functions/like/not_like.md | 0 .../sql-functions/string-functions/locate.md | 0 .../sql-functions/string-functions/lower.md | 0 .../sql-functions/string-functions/lpad.md | 0 .../sql-functions/string-functions/ltrim.md | 0 .../string-functions/money_format.md | 0 .../string-functions/null_or_empty.md | 0 .../string-functions/regexp/not_regexp.md | 0 .../string-functions/regexp/regexp.md | 0 .../string-functions/regexp/regexp_extract.md | 0 .../string-functions/regexp/regexp_replace.md | 0 .../sql-functions/string-functions/repeat.md | 0 .../sql-functions/string-functions/replace.md | 0 .../sql-functions/string-functions/reverse.md | 0 .../sql-functions/string-functions/right.md | 0 .../sql-functions/string-functions/rpad.md | 0 .../string-functions/split_part.md | 0 .../string-functions/starts_with.md | 0 .../sql-functions/string-functions/strleft.md | 0 .../string-functions/strright.md | 0 .../string-functions/substring.md | 0 .../sql-functions/string-functions/unhex.md | 0 .../table-functions/explode-bitmap.md | 0 .../table-functions/explode-json-array.md | 0 .../table-functions/explode-numbers.md | 0 .../table-functions/explode-split.md | 0 .../table-functions/outer-combinator.md | 0 .../sql-functions/window-function.md | 0 .../CREATE-ROLE.md | 0 .../CREATE-USER.md | 0 .../DROP-ROLE.md | 0 .../DROP-USER.md | 0 .../Account-Management-Statements/GRANT.md | 0 .../Account-Management-Statements/LDAP.md | 0 .../Account-Management-Statements/REVOKE.md | 0 .../SET-PASSWORD.md | 0 .../SET-PROPERTY.md | 0 .../ALTER-SYSTEM-ADD-BACKEND.md | 0 .../ALTER-SYSTEM-ADD-BROKER.md | 0 .../ALTER-SYSTEM-ADD-FOLLOWER.md | 0 .../ALTER-SYSTEM-ADD-OBSERVER.md | 0 .../ALTER-SYSTEM-DECOMMISSION-BACKEND.md | 0 .../ALTER-SYSTEM-DROP-BACKEND.md | 0 .../ALTER-SYSTEM-DROP-BROKER.md | 0 .../ALTER-SYSTEM-DROP-FOLLOWER.md | 0 .../ALTER-SYSTEM-DROP-OBSERVER.md | 0 .../ALTER-SYSTEM-MODIFY-BACKEND.md | 0 .../ALTER-SYSTEM-MODIFY-BROKER.md | 0 .../CANCEL-ALTER-SYSTEM.md | 0 .../Alter/ALTER-DATABASE.md | 0 .../Alter/ALTER-SQL-BLOCK-RULE.md | 0 .../Alter/ALTER-TABLE-BITMAP.md | 0 .../Alter/ALTER-TABLE-COLUMN.md | 0 .../Alter/ALTER-TABLE-PARTITION.md | 0 .../Alter/ALTER-TABLE-PROPERTY.md | 0 .../Alter/ALTER-TABLE-RENAME.md | 0 .../Alter/ALTER-TABLE-REPLACE.md | 0 .../Alter/ALTER-TABLE-ROLLUP.md | 0 .../Alter/ALTER-VIEW.md | 0 .../Alter/CANCEL-ALTER-TABLE.md | 0 .../Backup-and-Restore/BACKUP.md | 0 .../Backup-and-Restore/CANCEL-BACKUP.md | 0 .../Backup-and-Restore/CANCEL-RESTORE.md | 0 .../Backup-and-Restore/CREATE-REPOSITORY.md | 0 .../Backup-and-Restore/DROP-REPOSITORY.md | 0 .../Backup-and-Restore/RECOVER.md | 0 .../Backup-and-Restore/RESTORE.md | 0 .../Create/CREATE-DATABASE.md | 0 .../Create/CREATE-ENCRYPT-KEY.md | 0 .../Create/CREATE-EXTERNAL-TABLE.md | 0 .../Create/CREATE-FILE.md | 0 .../Create/CREATE-FUNCTION.md | 0 .../Create/CREATE-INDEX.md | 0 .../Create/CREATE-MATERIALIZED-VIEW.md | 0 .../Create/CREATE-RESOURCE.md | 0 .../Create/CREATE-SQL-BLOCK-RULE.md | 0 .../Create/CREATE-TABLE-LIKE.md | 0 .../Create/CREATE-TABLE.md | 0 .../Create/CREATE-VIEW.md | 0 .../Drop/DROP-DATABASE.md | 0 .../Drop/DROP-ENCRYPT-KEY.md | 0 .../Drop/DROP-FILE.md | 0 .../Drop/DROP-FUNCTION.md | 0 .../Drop/DROP-INDEX.md | 0 .../Drop/DROP-MATERIALIZED-VIEW.md | 0 .../Drop/DROP-RESOURCE.md | 0 .../Drop/DROP-SQL-BLOCK-RULE.md | 0 .../Drop/DROP-TABLE.md | 0 .../Drop/TRUNCATE-TABLE.md | 0 .../Load/ALTER-ROUTINE-LOAD.md | 0 .../Load/BROKER-LOAD.md | 0 .../Load/CANCEL-LOAD.md | 0 .../Load/CREATE-ROUTINE-LOAD.md | 0 .../Load/CREATE-SYNC-JOB.md | 0 .../Load/MULTI-LOAD.md | 0 .../Load/PAUSE-ROUTINE-LOAD.md | 0 .../Load/PAUSE-SYNC-JOB.md | 0 .../Load/RESUME-ROUTINE-LOAD.md | 0 .../Load/RESUME-SYNC-JOB.md | 0 .../Load/STOP-ROUTINE-LOAD.md | 0 .../Load/STOP-SYNC-JOB.md | 0 .../Load/STREAM-LOAD.md | 0 .../Manipulation/DELETE.md | 0 .../Manipulation/INSERT.md | 0 .../Manipulation/UPDATE.md | 0 .../Data-Manipulation-Statements/OUTFILE.md | 0 .../sql-reference-v2/Data-Types}/BIGINT.md | 0 .../sql-reference-v2/Data-Types}/BITMAP.md | 0 .../sql-reference-v2/Data-Types}/BOOLEAN.md | 0 .../sql-reference-v2/Data-Types}/CHAR.md | 0 .../sql-reference-v2/Data-Types}/DATE.md | 0 .../sql-reference-v2/Data-Types}/DATETIME.md | 0 .../sql-reference-v2/Data-Types}/DECIMAL.md | 0 .../sql-reference-v2/Data-Types}/DOUBLE.md | 0 .../sql-reference-v2/Data-Types}/FLOAT.md | 0 .../sql-reference-v2/Data-Types}/HLL.md | 0 .../sql-reference-v2/Data-Types}/INT.md | 0 .../sql-reference-v2/Data-Types}/LARGEINT.md | 0 .../Data-Types}/QUANTILE_STATE.md | 0 .../sql-reference-v2/Data-Types}/SMALLINT.md | 0 .../sql-reference-v2/Data-Types}/STRING.md | 0 .../sql-reference-v2/Data-Types}/TINYINT.md | 0 .../sql-reference-v2/Data-Types}/VARCHAR.md | 0 .../ADMIN-CANCEL-REPAIR.md | 0 .../ADMIN-CHECK-TABLET.md | 0 .../ADMIN-CLEAN-TRASH.md | 0 .../ADMIN-REPAIR-TABLE.md | 0 .../ADMIN-SET-CONFIG.md | 0 .../ADMIN-SET-REPLICA-STATUS.md | 0 .../ADMIN-SHOW-CONFIG.md | 0 .../ADMIN-SHOW-REPLICA-DISTRIBUTION.md | 0 .../ADMIN-SHOW-REPLICA-STATUS.md | 0 .../ADMIN-SHOW-TABLET-STORAGE-FORMAT.md} | 0 .../ENABLE-FEATURE.md | 0 .../INSTALL-PLUGIN.md | 0 .../KILL.md | 0 .../RECOVER.md | 0 .../SET-VARIABLE.md | 0 .../UNINSTALL-PLUGIN.md | 0 .../SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md | 0 .../Show-Statements/SHOW-ALTER.md | 0 .../Show-Statements/SHOW-BACKENDS.md | 0 .../Show-Statements/SHOW-BACKUP.md | 0 .../Show-Statements/SHOW-BROKER.md | 0 .../Show-Statements/SHOW-CHARSET.md | 0 .../Show-Statements/SHOW-COLLATION.md | 0 .../Show-Statements/SHOW-COLUMNS.md | 0 .../Show-Statements/SHOW-CREATE-DATABASE.md | 0 .../Show-Statements/SHOW-CREATE-FUNCTION.md | 0 .../SHOW-CREATE-ROUTINE-LOAD.md | 0 .../Show-Statements/SHOW-CREATE-TABLE.md | 0 .../Show-Statements/SHOW-DATA.md | 0 .../Show-Statements/SHOW-DATABASE-ID.md | 0 .../Show-Statements/SHOW-DATABASES.md | 0 .../Show-Statements/SHOW-DELETE.md | 0 .../Show-Statements/SHOW-DYNAMIC-PARTITION.md | 0 .../Show-Statements/SHOW-ENCRYPT-KEY.md | 0 .../Show-Statements/SHOW-ENGINES.md | 0 .../Show-Statements/SHOW-EVENTS.md | 0 .../Show-Statements/SHOW-EXPORT.md | 0 .../Show-Statements/SHOW-FILE.md | 0 .../Show-Statements/SHOW-FRONTENDS.md | 0 .../Show-Statements/SHOW-FUNCTIONS.md | 0 .../Show-Statements/SHOW-GRANTS.md | 0 .../Show-Statements/SHOW-INDEX.md | 0 .../Show-Statements/SHOW-LAST-INSERT.md | 0 .../Show-Statements/SHOW-LOAD-PROFILE.md | 0 .../Show-Statements/SHOW-LOAD-WARNINGS.md | 0 .../Show-Statements/SHOW-LOAD.md | 0 .../Show-Statements/SHOW-MIGRATIONS.md | 0 .../Show-Statements/SHOW-OPEN-TABLES.md | 0 .../Show-Statements/SHOW-PARTITION-ID.md | 0 .../Show-Statements/SHOW-PARTITIONS.md | 0 .../Show-Statements/SHOW-PLUGINS.md | 0 .../Show-Statements/SHOW-PROC.md | 0 .../Show-Statements/SHOW-PROCEDURE.md | 0 .../Show-Statements/SHOW-PROCESSLIST.md | 0 .../Show-Statements/SHOW-PROPERTY.md | 0 .../Show-Statements/SHOW-QUERY-PROFILE.md | 0 .../Show-Statements/SHOW-REPOSITORIES.md | 0 .../Show-Statements/SHOW-RESOURCES.md | 0 .../Show-Statements/SHOW-RESTORE.md | 0 .../Show-Statements/SHOW-ROLES.md | 0 .../Show-Statements/SHOW-ROLLUP.md | 0 .../Show-Statements/SHOW-ROUTINE-LOAD-TASK.md | 0 .../Show-Statements/SHOW-ROUTINE-LOAD.md | 0 .../Show-Statements/SHOW-SMALL-FILES.md | 0 .../Show-Statements/SHOW-SNAPSHOT.md | 0 .../Show-Statements/SHOW-SQL-BLOCK-RULE.md | 0 .../Show-Statements/SHOW-STATUS.md | 0 .../Show-Statements/SHOW-STREAM-LOAD.md | 0 .../Show-Statements/SHOW-SYNC-JOB.md | 0 .../Show-Statements/SHOW-TABLE-ID.md | 0 .../Show-Statements/SHOW-TABLE-STATUS.md | 0 .../Show-Statements/SHOW-TABLET.md | 0 .../Show-Statements/SHOW-TRANSACTION.md | 0 .../Show-Statements/SHOW-TRASH.md | 0 .../Show-Statements/SHOW-TRIGGERS.md | 0 .../Show-Statements/SHOW-USER.md | 0 .../Show-Statements/SHOW-VARIABLES.md | 0 .../Show-Statements/SHOW-VIEW.md | 0 .../Show-Statements/SHOW-WARNING.md | 0 .../Show-Statements/SHOW-WHITE-LIST.md | 0 .../Utility-Statements/DESCRIBE.md | 0 .../Utility-Statements/HELP.md | 0 .../Utility-Statements/USE.md | 0 .../ALTER-USER.md | 38 - .../CREATE-ROLE.md | 38 - .../CREATE-USER.md | 38 - .../DROP-ROLE.md | 38 - .../DROP-USER.md | 38 - .../Account-Management-Statements/GRANT.md | 38 - .../Account-Management-Statements/LDAP.md | 38 - .../Account-Management-Statements/REVOKE.md | 38 - .../SET-PASSWORD.md | 38 - .../SET-PROPERTY.md | 38 - .../ALTER-SYSTEM-ADD-BACKEND.md | 38 - .../ALTER-SYSTEM-ADD-FOLLOWER.md | 38 - .../ALTER-SYSTEM-ADD-OBSERVER.md | 38 - .../ALTER-SYSTEM-DECOMMISSION-BACKEND.md | 38 - .../ALTER-SYSTEM-DROP-BACKEND.md | 38 - .../ALTER-SYSTEM-DROP-FOLLOWER.md | 38 - .../ALTER-SYSTEM-DROP-OBSERVER.md | 38 - .../CANCEL-ALTER-SYSTEM.md | 38 - .../Alter/ALTER-DATABASE.md | 38 - .../Alter/ALTER-TABLE-COLUMN.md | 38 - .../Alter/ALTER-TABLE-PARTITION.md | 38 - .../Alter/ALTER-TABLE-PROPERTY.md | 38 - .../Alter/ALTER-TABLE-RENAME.md | 38 - .../Alter/ALTER-TABLE-REPLACE.md | 38 - .../Alter/ALTER-TABLE-ROLLUP.md | 38 - .../Alter/ALTER-VIEW.md | 38 - .../Alter/CANCEL-ALTER-TABLE.md | 38 - .../Backup-and-Restore/BACKUP.md | 38 - .../Backup-and-Restore/CANCEL-BACKUP.md | 38 - .../Backup-and-Restore/CANCEL-RESTORE.md | 38 - .../Backup-and-Restore/CREATE-REPOSITORY.md | 38 - .../Backup-and-Restore/DROP-REPOSITORY.md | 38 - .../Backup-and-Restore/RESTORE.md | 38 - .../Create/CREATE-DATABASE.md | 38 - .../Create/CREATE-ENCRYPT-KEY.md | 38 - .../Create/CREATE-FILE.md | 38 - .../Create/CREATE-FUNCTION.md | 38 - .../Create/CREATE-INDEX.md | 38 - .../Create/CREATE-MATERIALIZED-VIEW.md | 38 - .../Create/CREATE-RESOURCE.md | 38 - .../Create/CREATE-TABLE-LIKE.md | 38 - .../Create/CREATE-TABLE.md | 569 - .../Create/CREATE-VIEW.md | 38 - .../Drop/DROP-DATABASE.md | 38 - .../Drop/DROP-ENCRYPT-KEY.md | 38 - .../Drop/DROP-FILE.md | 38 - .../Drop/DROP-FUNCTION.md | 38 - .../Drop/DROP-INDEX.md | 38 - .../Drop/DROP-MATERIALIZED-VIEW.md | 38 - .../Drop/DROP-RESOURCE.md | 38 - .../Drop/DROP-TABLE.md | 38 - .../Drop/TRUNCATE-TABLE.md | 38 - .../Load/ALTER-ROUTINE-LOAD.md | 38 - .../Load/BROKER-LOAD.md | 38 - .../Load/CANCEL-LOAD.md | 38 - .../Load/CREATE-ROUTINE-LOAD.md | 38 - .../Load/PAUSE-ROUTINE-LOAD.md | 38 - .../Load/RESUME-ROUTINE-LOAD.md | 38 - .../Load/STOP-ROUTINE-LOAD.md | 38 - .../Load/STREAM-LOAD.md | 38 - .../Manipulation/DELETE.md | 38 - .../Manipulation/INSERT.md | 38 - .../Manipulation/UPDATE.md | 38 - .../ADMIN-CANCEL-REPAIR.md | 38 - .../ADMIN-CHECK-TABLET.md | 38 - .../ADMIN-REPAIR-TABLE.md | 38 - .../ADMIN-SET-CONFIG.md | 38 - .../ADMIN-SET-REPLICA-STATUS.md | 38 - .../ADMIN-SHOW-CONFIG.md | 38 - .../ADMIN-SHOW-REPLICA-DISTRIBUTION.md | 38 - .../ADMIN-SHOW-REPLICA-STATUS.md | 38 - .../INSTALL-PLUGIN.md | 38 - .../KILL.md | 38 - .../RECOVER.md | 38 - .../SET-VARIABLE.md | 38 - .../UNINSTALL-PLUGIN.md | 38 - .../Show-Statements/SHOW-ALTER.md | 38 - .../Show-Statements/SHOW-BACKENDS.md | 38 - .../Show-Statements/SHOW-BACKUP.md | 38 - .../Show-Statements/SHOW-BROKER.md | 38 - .../Show-Statements/SHOW-COLUMNS.md | 38 - .../Show-Statements/SHOW-CREATE-DATABASE.md | 38 - .../Show-Statements/SHOW-CREATE-FUNCTION.md | 38 - .../SHOW-CREATE-ROUTINE-LOAD.md | 38 - .../Show-Statements/SHOW-CREATE-TABLE.md | 38 - .../Show-Statements/SHOW-DATA.md | 38 - .../Show-Statements/SHOW-DATABASE-ID.md | 38 - .../Show-Statements/SHOW-DATABASES.md | 38 - .../Show-Statements/SHOW-DELETE.md | 38 - .../Show-Statements/SHOW-DYNAMIC-PARTITION.md | 38 - .../Show-Statements/SHOW-ENCRYPT-KEY.md | 38 - .../Show-Statements/SHOW-ENGINES.md | 38 - .../Show-Statements/SHOW-EVENTS.md | 38 - .../Show-Statements/SHOW-EXPORT.md | 38 - .../Show-Statements/SHOW-FRONTENDS.md | 38 - .../Show-Statements/SHOW-FUNCTIONS.md | 38 - .../Show-Statements/SHOW-GRANTS.md | 38 - .../Show-Statements/SHOW-INDEX.md | 38 - .../Show-Statements/SHOW-LOAD-PROFILE.md | 38 - .../Show-Statements/SHOW-LOAD-WARNINGS.md | 38 - .../Show-Statements/SHOW-LOAD.md | 38 - .../Show-Statements/SHOW-MIGRATIONS.md | 38 - .../Show-Statements/SHOW-OPEN-TABLES.md | 38 - .../Show-Statements/SHOW-PARTITION-ID.md | 38 - .../Show-Statements/SHOW-PARTITIONS.md | 38 - .../Show-Statements/SHOW-PLUGINS.md | 38 - .../Show-Statements/SHOW-PROC.md | 38 - .../Show-Statements/SHOW-PROCESSLIST.md | 38 - .../Show-Statements/SHOW-PROPERTY.md | 83 - .../Show-Statements/SHOW-REPOSITORIES.md | 38 - .../Show-Statements/SHOW-RESOURCES.md | 38 - .../Show-Statements/SHOW-RESTORE.md | 38 - .../Show-Statements/SHOW-ROLES.md | 38 - .../Show-Statements/SHOW-ROUTINE-LOAD-TASK.md | 38 - .../Show-Statements/SHOW-ROUTINE-LOAD.md | 38 - .../Show-Statements/SHOW-SMALL-FILES.md | 38 - .../Show-Statements/SHOW-SNAPSHOT.md | 38 - .../Show-Statements/SHOW-STATUS.md | 38 - .../Show-Statements/SHOW-STREAM-LOAD.md | 38 - .../Show-Statements/SHOW-TABLE-ID.md | 38 - .../Show-Statements/SHOW-TABLE-STATUS.md | 38 - .../Show-Statements/SHOW-TABLET.md | 38 - .../Show-Statements/SHOW-TRANSACTION.md | 38 - .../Show-Statements/SHOW-USER.md | 38 - .../Show-Statements/SHOW-VARIABLES.md | 38 - .../Show-Statements/SHOW-VIEW.md | 38 - .../Utility-Statements/DESCRIBE.md | 38 - .../sql-statements/Utility-Statements/HELP.md | 38 - .../sql-statements/Utility-Statements/USE.md | 38 - .../Account Management/CREATE ROLE.md | 45 - .../Account Management/CREATE USER.md | 76 - .../Account Management/DROP ROLE.md | 44 - .../Account Management/DROP USER.md | 50 - .../Account Management/GRANT.md | 110 - .../Account Management/REVOKE.md | 55 - .../Account Management/SET PASSWORD.md | 56 - .../Account Management/SET PROPERTY.md | 108 - .../Account Management/SHOW GRANTS.md | 57 - .../Account Management/SHOW ROLES.md | 42 - .../ADMIN CANCEL REBALANCE DISK.md | 52 - .../Administration/ADMIN CANCEL REPAIR.md | 48 - .../Administration/ADMIN CHECK TABLET.md | 57 - .../Administration/ADMIN CLEAN TRASH.md | 47 - .../Administration/ADMIN COMPACT.md | 53 - .../Administration/ADMIN REBALANCE DISK.md | 54 - .../Administration/ADMIN REPAIR.md | 53 - .../Administration/ADMIN SET CONFIG.md | 44 - .../ADMIN SET REPLICA STATUS.md | 62 - .../Administration/ADMIN SHOW CONFIG.md | 63 - .../ADMIN SHOW REPLICA DISTRIBUTION.md | 52 - .../ADMIN SHOW REPLICA STATUS.md | 65 - .../Administration/ADMIN-DIAGNOSE-TABLET.md | 59 - .../Administration/ALTER CLUSTER.md | 54 - .../Administration/ALTER SYSTEM.md | 140 - .../Administration/CANCEL DECOMMISSION.md | 41 - .../Administration/CREATE CLUSTER.md | 62 - .../Administration/CREATE FILE.md | 77 - .../Administration/DROP CLUSTER.md | 44 - .../Administration/DROP FILE.md | 51 - .../sql-statements/Administration/ENTER.md | 44 - .../Administration/INSTALL PLUGIN.md | 63 - .../Administration/LINK DATABASE.md | 51 - .../Administration/MIGRATE DATABASE.md | 47 - .../Administration/SET LDAP_ADMIN_PASSWORD.md | 45 - .../Administration/SHOW BACKENDS.md | 49 - .../Administration/SHOW BROKER.md | 41 - .../Administration/SHOW FILE.md | 53 - .../Administration/SHOW FRONTENDS.md | 44 - .../Administration/SHOW FULL COLUMNS.md | 40 - .../Administration/SHOW INDEX.md | 44 - .../Administration/SHOW MIGRATIONS.md | 38 - .../Administration/SHOW PLUGINS.md | 45 - .../Administration/SHOW TABLE STATUS.md | 48 - .../Administration/SHOW TRASH.md | 49 - .../Administration/SHOW VIEW.md | 39 - .../Administration/UNINSTALL PLUGIN.md | 47 - .../Data Definition/ALTER DATABASE.md | 62 - .../Data Definition/ALTER RESOURCE.md | 48 - .../Data Definition/ALTER TABLE.md | 441 - .../Data Definition/ALTER VIEW.md | 52 - .../sql-statements/Data Definition/BACKUP.md | 71 - .../Data Definition/CANCEL ALTER.md | 69 - .../Data Definition/CANCEL BACKUP.md | 39 - .../Data Definition/CANCEL RESTORE.md | 42 - .../Data Definition/CREATE DATABASE.md | 69 - .../Data Definition/CREATE ENCRYPTKEY.md | 80 - .../Data Definition/CREATE INDEX.md | 45 - .../CREATE MATERIALIZED VIEW.md | 237 - .../Data Definition/CREATE REPOSITORY.md | 88 - .../Data Definition/CREATE RESOURCE.md | 132 - .../Data Definition/CREATE TABLE LIKE.md | 78 - .../Data Definition/CREATE TABLE.md | 912 - .../Data Definition/CREATE VIEW.md | 64 - .../Data Definition/DROP DATABASE.md | 42 - .../Data Definition/DROP ENCRYPTKEY.md | 55 - .../Data Definition/DROP INDEX.md | 37 - .../Data Definition/DROP MATERIALIZED VIEW.md | 108 - .../Data Definition/DROP REPOSITORY.md | 42 - .../Data Definition/DROP RESOURCE.md | 44 - .../Data Definition/DROP TABLE.md | 46 - .../Data Definition/DROP VIEW.md | 40 - .../sql-statements/Data Definition/HLL.md | 109 - .../sql-statements/Data Definition/RECOVER.md | 54 - .../Data Definition/REFRESH DATABASE.md | 46 - .../Data Definition/REFRESH TABLE.md | 46 - .../sql-statements/Data Definition/RESTORE.md | 88 - .../Data Definition/SHOW ENCRYPTKEYS.md | 68 - .../Data Definition/SHOW RESOURCES.md | 66 - .../Data Definition/TRUNCATE TABLE.md | 53 - .../Data Definition/create-function.md | 153 - .../Data Definition/drop-function.md | 56 - .../Data Definition/show-functions.md | 85 - .../sql-statements/Data Manipulation/BEGIN.md | 93 - .../Data Manipulation/BROKER LOAD.md | 603 - .../Data Manipulation/CANCEL LOAD.md | 46 - .../Data Manipulation/CREATE SYNC JOB.md | 166 - .../Data Manipulation/DELETE.md | 66 - .../Data Manipulation/EXPORT.md | 126 - .../Data Manipulation/GROUP BY.md | 170 - .../sql-statements/Data Manipulation/LOAD.md | 298 - .../Data Manipulation/MINI LOAD.md | 139 - .../Data Manipulation/MULTI LOAD.md | 108 - .../Data Manipulation/PAUSE ROUTINE LOAD.md | 40 - .../Data Manipulation/PAUSE SYNC JOB.md | 43 - .../Data Manipulation/RESUME ROUTINE LOAD.md | 41 - .../Data Manipulation/RESUME SYNC JOB.md | 44 - .../Data Manipulation/ROUTINE LOAD.md | 544 - .../Data Manipulation/SHOW ALTER.md | 55 - .../Data Manipulation/SHOW BACKUP.md | 63 - .../Data Manipulation/SHOW CREATE FUNCTION.md | 43 - .../SHOW CREATE ROUTINE LOAD.md | 44 - .../Data Manipulation/SHOW DATA SKEW.md | 50 - .../Data Manipulation/SHOW DATA.md | 113 - .../Data Manipulation/SHOW DATABASE ID.md | 39 - .../Data Manipulation/SHOW DATABASES.md | 35 - .../Data Manipulation/SHOW DELETE.md | 39 - .../SHOW DYNAMIC PARTITION TABLES.md | 39 - .../Data Manipulation/SHOW EXPORT.md | 69 - .../Data Manipulation/SHOW LOAD.md | 75 - .../Data Manipulation/SHOW PARTITION ID.md | 39 - .../Data Manipulation/SHOW PARTITIONS.md | 49 - .../Data Manipulation/SHOW PROPERTY.md | 42 - .../Data Manipulation/SHOW REPOSITORIES.md | 49 - .../Data Manipulation/SHOW RESTORE.md | 67 - .../SHOW ROUTINE LOAD TASK.md | 35 - .../Data Manipulation/SHOW ROUTINE LOAD.md | 108 - .../Data Manipulation/SHOW SNAPSHOT.md | 57 - .../Data Manipulation/SHOW STREAM LOAD.md | 69 - .../Data Manipulation/SHOW SYNC JOB.md | 49 - .../Data Manipulation/SHOW TABLE CREATION.md | 82 - .../Data Manipulation/SHOW TABLE ID.md | 39 - .../Data Manipulation/SHOW TABLES.md | 35 - .../Data Manipulation/SHOW TABLET.md | 40 - .../Data Manipulation/SHOW TABLETS.md | 65 - .../Data Manipulation/SHOW TRANSACTION.md | 101 - .../Data Manipulation/SHOW-LAST-INSERT.md | 68 - .../Data Manipulation/SPARK LOAD.md | 265 - .../Data Manipulation/STOP ROUTINE LOAD.md | 36 - .../Data Manipulation/STOP SYNC JOB.md | 44 - .../Data Manipulation/STREAM LOAD.md | 226 - .../Data Manipulation/UPDATE.md | 75 - .../Data Manipulation/alter-routine-load.md | 119 - .../Data Manipulation/insert.md | 111 - .../Data Manipulation/lateral-view.md | 94 - .../sql-statements/Utility/DESCRIBE.md | 48 - .../zh-CN/summary/basic-summary.md | 4 +- .../zh-CN/summary/system-architecture.md | 5 +- new-docs/.markdownlint.yml | 19 - new-docs/.vuepress/components/CaseList.vue | 105 - new-docs/.vuepress/config.js | 130 - new-docs/.vuepress/sidebar/en.js | 947 - new-docs/.vuepress/sidebar/zh-CN.js | 947 - .../.vuepress/theme/components/Footer.vue | 89 - new-docs/.vuepress/theme/components/Home.vue | 454 - new-docs/.vuepress/theme/index.js | 21 - new-docs/.vuepress/theme/layouts/Article.vue | 71 - .../.vuepress/theme/layouts/ArticleList.vue | 141 - new-docs/.vuepress/theme/layouts/Layout.vue | 57 - new-docs/.vuepress/theme/styles/index.styl | 21 - new-docs/.vuepress/vuepress.textClipping | Bin 199 -> 0 bytes new-docs/README.md | 255 - new-docs/build_help_zip.sh | 44 - new-docs/en/README.md | 95 - .../admin-manual/http-actions/cancel-label.md | 64 - .../http-actions/check-reset-rpc-cache.md | 47 - .../http-actions/compaction-action.md | 211 - .../http-actions/connection-action.md | 42 - .../http-actions/fe-get-log-file.md | 74 - .../http-actions/fe/backends-action.md | 70 - .../http-actions/fe/bootstrap-action.md | 121 - .../http-actions/fe/cancel-load-action.md | 96 - .../fe/check-decommission-action.md | 84 - .../fe/check-storage-type-action.md | 84 - .../http-actions/fe/config-action.md | 83 - .../http-actions/fe/connection-action.md | 82 - .../http-actions/fe/get-ddl-stmt-action.md | 92 - .../http-actions/fe/get-load-info-action.md | 95 - .../http-actions/fe/get-load-state.md | 88 - .../http-actions/fe/get-log-file-action.md | 113 - .../http-actions/fe/get-small-file.md | 102 - .../admin-manual/http-actions/fe/ha-action.md | 95 - .../http-actions/fe/hardware-info-action.md | 79 - .../http-actions/fe/health-action.md | 61 - .../http-actions/fe/log-action.md | 98 - .../http-actions/fe/logout-action.md | 59 - .../http-actions/fe/manager/cluster-action.md | 77 - .../http-actions/fe/manager/node-action.md | 435 - .../fe/manager/query-profile-action.md | 308 - .../http-actions/fe/meta-action.md | 64 - .../http-actions/fe/meta-info-action.md | 232 - .../fe/meta-replay-state-action.md | 61 - .../http-actions/fe/profile-action.md | 80 - .../http-actions/fe/query-detail-action.md | 116 - .../http-actions/fe/query-profile-action.md | 108 - .../http-actions/fe/row-count-action.md | 84 - .../http-actions/fe/session-action.md | 75 - .../http-actions/fe/set-config-action.md | 150 - .../http-actions/fe/show-data-action.md | 111 - .../http-actions/fe/show-meta-info-action.md | 138 - .../http-actions/fe/show-proc-action.md | 104 - .../fe/show-runtime-info-action.md | 84 - .../fe/statement-execution-action.md | 102 - .../http-actions/fe/system-action.md | 93 - .../fe/table-query-plan-action.md | 113 - .../http-actions/fe/table-row-count-action.md | 88 - .../http-actions/fe/table-schema-action.md | 155 - .../http-actions/fe/upload-action.md | 159 - .../http-actions/get-load-state.md | 59 - .../admin-manual/http-actions/get-tablets.md | 65 - .../http-actions/profile-action.md | 120 - .../http-actions/query-detail-action.md | 61 - .../http-actions/restore-tablet.md | 41 - .../http-actions/show-data-action.md | 35 - .../http-actions/tablet-migration-action.md | 89 - .../http-actions/tablets_distribution.md | 124 - .../maint-monitor/doris-error-code.md | 178 - .../maint-monitor/monitor-alert.md | 309 - .../monitor-metrics/be-metrics.md | 84 - .../monitor-metrics/fe-metrics.md | 155 - .../maint-monitor/multi-tenant.md | 238 - .../maint-monitor/tablet-restore-tool.md | 136 - .../en/advanced/orthogonal-bitmap-manual.md | 159 - .../export/export_with_mysql_dump.md | 41 - new-docs/en/data-table/index/bloomfilter.md | 133 - new-docs/en/ecosystem/audit-plugin.md | 100 - .../en/ecosystem/plugin-development-manual.md | 314 - .../compilation-with-ldb-toolchain.md | 127 - .../approx_count_distinct.md | 49 - .../sql-functions/aggregate-functions/avg.md | 59 - .../aggregate-functions/bitmap_union.md | 146 - .../aggregate-functions/count.md | 61 - .../aggregate-functions/group_concat.md | 70 - .../aggregate-functions/hll_union_agg.md | 52 - .../sql-functions/aggregate-functions/max.md | 46 - .../sql-functions/aggregate-functions/min.md | 46 - .../aggregate-functions/percentile.md | 58 - .../aggregate-functions/percentile_approx.md | 56 - .../aggregate-functions/stddev.md | 53 - .../aggregate-functions/stddev_samp.md | 46 - .../sql-functions/aggregate-functions/sum.md | 46 - .../sql-functions/aggregate-functions/topn.md | 61 - .../aggregate-functions/var_samp.md | 46 - .../aggregate-functions/variance.md | 53 - .../bitmap-functions/bitmap_and.md | 83 - .../bitmap-functions/bitmap_and_count.md | 84 - .../bitmap-functions/bitmap_and_not.md | 48 - .../bitmap-functions/bitmap_and_not_count.md | 49 - .../bitmap-functions/bitmap_contains.md | 55 - .../bitmap-functions/bitmap_empty.md | 52 - .../bitmap-functions/bitmap_from_string.md | 63 - .../bitmap-functions/bitmap_has_all.md | 56 - .../bitmap-functions/bitmap_has_any.md | 55 - .../bitmap-functions/bitmap_hash.md | 52 - .../bitmap-functions/bitmap_intersect.md | 61 - .../bitmap-functions/bitmap_max.md | 55 - .../bitmap-functions/bitmap_min.md | 55 - .../bitmap-functions/bitmap_not.md | 55 - .../bitmap-functions/bitmap_or.md | 83 - .../bitmap-functions/bitmap_or_count.md | 77 - .../bitmap_subset_in_range.md | 57 - .../bitmap-functions/bitmap_subset_limit.md | 59 - .../bitmap-functions/bitmap_to_string.md | 70 - .../bitmap-functions/bitmap_union.md | 59 - .../bitmap-functions/bitmap_xor.md | 76 - .../bitmap-functions/bitmap_xor_count.md | 83 - .../orthogonal_bitmap_intersect.md | 47 - .../orthogonal_bitmap_intersect_count.md | 46 - .../orthogonal_bitmap_union_count.md | 46 - .../bitmap-functions/sub_bitmap.md | 61 - .../bitmap-functions/to_bitmap.md | 61 - .../sql-functions/bitwise-functions/bitand.md | 57 - .../sql-functions/bitwise-functions/bitnot.md | 57 - .../sql-functions/bitwise-functions/bitor.md | 57 - .../sql-functions/bitwise-functions/bitxor.md | 57 - new-docs/en/sql-manual/sql-functions/cast.md | 83 - .../conditional-functions/case.md | 72 - .../conditional-functions/coalesce.md | 47 - .../sql-functions/conditional-functions/if.md | 50 - .../conditional-functions/ifnull.md | 54 - .../conditional-functions/nullif.md | 61 - .../date-time-functions/convert_tz.md | 55 - .../date-time-functions/curdate.md | 54 - .../date-time-functions/current_timestamp.md | 47 - .../date-time-functions/curtime.md | 48 - .../date-time-functions/date_add.md | 53 - .../date-time-functions/date_format.md | 166 - .../date-time-functions/date_sub.md | 53 - .../date-time-functions/datediff.md | 59 - .../sql-functions/date-time-functions/day.md | 49 - .../date-time-functions/dayname.md | 49 - .../date-time-functions/dayofmonth.md | 50 - .../date-time-functions/dayofweek.md | 55 - .../date-time-functions/dayofyear.md | 50 - .../date-time-functions/from_days.md | 46 - .../date-time-functions/from_unixtime.md | 67 - .../sql-functions/date-time-functions/hour.md | 48 - .../date-time-functions/makedate.md | 45 - .../date-time-functions/minute.md | 48 - .../date-time-functions/month.md | 49 - .../date-time-functions/monthname.md | 49 - .../sql-functions/date-time-functions/now.md | 47 - .../date-time-functions/second.md | 48 - .../date-time-functions/str_to_date.md | 72 - .../date-time-functions/time_round.md | 86 - .../date-time-functions/timediff.md | 65 - .../date-time-functions/timestampadd.md | 58 - .../date-time-functions/timestampdiff.md | 67 - .../date-time-functions/to_date.md | 48 - .../date-time-functions/to_days.md | 50 - .../date-time-functions/unix_timestamp.md | 86 - .../date-time-functions/utc_timestamp.md | 50 - .../sql-functions/date-time-functions/week.md | 67 - .../date-time-functions/weekday.md | 66 - .../date-time-functions/weekofyear.md | 50 - .../sql-functions/date-time-functions/year.md | 50 - .../date-time-functions/yearweek.md | 79 - .../sql-functions/digital-masking.md | 56 - .../hash-functions/murmur_hash3_32.md | 61 - .../json-functions/get_json_double.md | 74 - .../json-functions/get_json_int.md | 74 - .../json-functions/get_json_string.md | 84 - .../json-functions/json_array.md | 70 - .../json-functions/json_object.md | 71 - .../json-functions/json_quote.md | 70 - .../sql-functions/math-functions/conv.md | 60 - .../sql-functions/math-functions/pmod.md | 54 - .../spatial-functions/st_astext.md | 47 - .../spatial-functions/st_circle.md | 48 - .../spatial-functions/st_contains.md | 56 - .../spatial-functions/st_distance_sphere.md | 47 - .../spatial-functions/st_geometryfromtext.md | 47 - .../spatial-functions/st_linefromtext.md | 47 - .../spatial-functions/st_point.md | 48 - .../spatial-functions/st_polygon.md | 48 - .../sql-functions/spatial-functions/st_x.md | 47 - .../sql-functions/spatial-functions/st_y.md | 47 - .../append_trailing_char_if_absent.md | 60 - .../sql-functions/string-functions/ascii.md | 54 - .../string-functions/bit_length.md | 54 - .../string-functions/char_length.md | 55 - .../sql-functions/string-functions/concat.md | 61 - .../string-functions/concat_ws.md | 63 - .../string-functions/ends_with.md | 54 - .../string-functions/find_in_set.md | 49 - .../sql-functions/string-functions/hex.md | 85 - .../sql-functions/string-functions/instr.md | 54 - .../sql-functions/string-functions/lcase.md | 37 - .../sql-functions/string-functions/left.md | 47 - .../sql-functions/string-functions/length.md | 54 - .../string-functions/like/like.md | 83 - .../string-functions/like/not_like.md | 85 - .../sql-functions/string-functions/locate.md | 61 - .../sql-functions/string-functions/lower.md | 47 - .../sql-functions/string-functions/lpad.md | 54 - .../sql-functions/string-functions/ltrim.md | 47 - .../string-functions/money_format.md | 61 - .../string-functions/null_or_empty.md | 60 - .../string-functions/regexp/not_regexp.md | 56 - .../string-functions/regexp/regexp.md | 56 - .../string-functions/regexp/regexp_extract.md | 54 - .../string-functions/regexp/regexp_replace.md | 54 - .../sql-functions/string-functions/repeat.md | 54 - .../sql-functions/string-functions/replace.md | 46 - .../sql-functions/string-functions/reverse.md | 56 - .../sql-functions/string-functions/right.md | 47 - .../sql-functions/string-functions/rpad.md | 54 - .../string-functions/split_part.md | 69 - .../string-functions/starts_with.md | 54 - .../sql-functions/string-functions/strleft.md | 47 - .../string-functions/strright.md | 48 - .../string-functions/substring.md | 77 - .../sql-functions/string-functions/unhex.md | 63 - .../table-functions/explode-bitmap.md | 157 - .../table-functions/explode-json-array.md | 286 - .../table-functions/explode-split.md | 112 - .../sql-functions/window-function.md | 487 - .../ALTER-SYSTEM-MODIFY-BROKER.md | 38 - .../sql-reference-v2/Data-Types/BIGINT.md | 33 - .../sql-reference-v2/Data-Types/BITMAP.md | 48 - .../sql-reference-v2/Data-Types/BOOLEAN.md | 33 - .../sql-reference-v2/Data-Types/CHAR.md | 33 - .../sql-reference-v2/Data-Types/DATE.md | 41 - .../sql-reference-v2/Data-Types/DATETIME.md | 34 - .../sql-reference-v2/Data-Types/DECIMAL.md | 34 - .../sql-reference-v2/Data-Types/DOUBLE.md | 33 - .../sql-reference-v2/Data-Types/FLOAT.md | 33 - .../sql-reference-v2/Data-Types/HLL.md | 49 - .../sql-reference-v2/Data-Types/INT.md | 33 - .../Data-Types/QUANTILE_STATE.md | 62 - .../sql-reference-v2/Data-Types/SMALLINT.md | 33 - .../sql-reference-v2/Data-Types/STRING.md | 35 - .../sql-reference-v2/Data-Types/TINYINT.md | 33 - .../sql-reference-v2/Data-Types/VARCHAR.md | 35 - .../ENABLE-FEATURE.md | 38 - .../Show-Statements/SHOW-CHARSET.md | 38 - .../Show-Statements/SHOW-COLLATION.md | 38 - .../Show-Statements/SHOW-ENGINES.md | 38 - .../Show-Statements/SHOW-EVENTS.md | 38 - .../Show-Statements/SHOW-OPEN-TABLES.md | 38 - .../Show-Statements/SHOW-PROCEDURE.md | 38 - .../Show-Statements/SHOW-QUERY-PROFILE.md | 38 - .../Show-Statements/SHOW-ROLLUP.md | 38 - .../Show-Statements/SHOW-TRIGGERS.md | 38 - .../Show-Statements/SHOW-USER.md | 38 - .../Show-Statements/SHOW-WARNING.md | 38 - .../Show-Statements/SHOW-WHITE-LIST.md | 38 - .../en/summary/Doris System Architecture.md | 28 - new-docs/package-lock.json | 28213 ---------------- new-docs/package.json | 38 - new-docs/zh-CN/README.md | 95 - .../admin-manual/http-actions/cancel-label.md | 59 - .../http-actions/check-reset-rpc-cache.md | 46 - .../http-actions/compaction-action.md | 212 - .../http-actions/connection-action.md | 42 - .../http-actions/fe-get-log-file.md | 80 - .../http-actions/fe/backends-action.md | 70 - .../http-actions/fe/bootstrap-action.md | 125 - .../http-actions/fe/cancel-load-action.md | 96 - .../fe/check-decommission-action.md | 84 - .../fe/check-storage-type-action.md | 84 - .../http-actions/fe/config-action.md | 83 - .../http-actions/fe/connection-action.md | 82 - .../http-actions/fe/get-ddl-stmt-action.md | 92 - .../http-actions/fe/get-load-info-action.md | 95 - .../http-actions/fe/get-load-state.md | 88 - .../http-actions/fe/get-log-file-action.md | 112 - .../http-actions/fe/get-small-file.md | 101 - .../admin-manual/http-actions/fe/ha-action.md | 95 - .../http-actions/fe/hardware-info-action.md | 80 - .../http-actions/fe/health-action.md | 61 - .../http-actions/fe/log-action.md | 98 - .../http-actions/fe/logout-action.md | 59 - .../http-actions/fe/manager/cluster-action.md | 77 - .../http-actions/fe/manager/node-action.md | 435 - .../fe/manager/query-profile-action.md | 308 - .../http-actions/fe/meta-action.md | 60 - .../http-actions/fe/meta-info-action.md | 232 - .../fe/meta-replay-state-action.md | 61 - .../http-actions/fe/profile-action.md | 80 - .../http-actions/fe/query-detail-action.md | 116 - .../http-actions/fe/query-profile-action.md | 108 - .../http-actions/fe/row-count-action.md | 84 - .../http-actions/fe/session-action.md | 75 - .../http-actions/fe/set-config-action.md | 149 - .../http-actions/fe/show-data-action.md | 111 - .../http-actions/fe/show-meta-info-action.md | 138 - .../http-actions/fe/show-proc-action.md | 104 - .../fe/show-runtime-info-action.md | 84 - .../fe/statement-execution-action.md | 102 - .../http-actions/fe/system-action.md | 93 - .../fe/table-query-plan-action.md | 113 - .../http-actions/fe/table-row-count-action.md | 88 - .../http-actions/fe/table-schema-action.md | 155 - .../http-actions/fe/upload-action.md | 159 - .../http-actions/get-load-state.md | 59 - .../admin-manual/http-actions/get-tablets.md | 65 - .../http-actions/profile-action.md | 120 - .../http-actions/query-detail-action.md | 59 - .../http-actions/restore-tablet.md | 43 - .../http-actions/show-data-action.md | 35 - .../http-actions/tablet-migration-action.md | 87 - .../http-actions/tablets_distribution.md | 124 - .../maint-monitor/monitor-alert.md | 309 - .../monitor-metrics/be-metrics.md | 84 - .../monitor-metrics/fe-metrics.md | 155 - .../maint-monitor/tablet-meta-tool.md | 146 - .../maint-monitor/tablet-restore-tool.md | 142 - new-docs/zh-CN/admin-manual/query-profile.md | 27 - .../approx_count_distinct.md | 48 - .../sql-functions/aggregate-functions/avg.md | 57 - .../aggregate-functions/bitmap_union.md | 146 - .../aggregate-functions/count.md | 61 - .../aggregate-functions/group_concat.md | 70 - .../aggregate-functions/hll_union_agg.md | 52 - .../sql-functions/aggregate-functions/max.md | 46 - .../sql-functions/aggregate-functions/min.md | 46 - .../aggregate-functions/percentile.md | 57 - .../aggregate-functions/percentile_approx.md | 59 - .../aggregate-functions/stddev.md | 53 - .../aggregate-functions/stddev_samp.md | 46 - .../sql-functions/aggregate-functions/sum.md | 46 - .../sql-functions/aggregate-functions/topn.md | 60 - .../aggregate-functions/var_samp.md | 47 - .../aggregate-functions/variance.md | 54 - .../bitmap-functions/bitmap_and.md | 83 - .../bitmap-functions/bitmap_and_count.md | 84 - .../bitmap-functions/bitmap_and_not.md | 48 - .../bitmap-functions/bitmap_and_not_count.md | 48 - .../bitmap-functions/bitmap_contains.md | 55 - .../bitmap-functions/bitmap_empty.md | 52 - .../bitmap-functions/bitmap_from_string.md | 63 - .../bitmap-functions/bitmap_has_all.md | 56 - .../bitmap-functions/bitmap_has_any.md | 55 - .../bitmap-functions/bitmap_hash.md | 52 - .../bitmap-functions/bitmap_intersect.md | 62 - .../bitmap-functions/bitmap_max.md | 55 - .../bitmap-functions/bitmap_min.md | 55 - .../bitmap-functions/bitmap_not.md | 55 - .../bitmap-functions/bitmap_or.md | 83 - .../bitmap-functions/bitmap_or_count.md | 77 - .../bitmap_subset_in_range.md | 57 - .../bitmap-functions/bitmap_subset_limit.md | 59 - .../bitmap-functions/bitmap_to_string.md | 69 - .../bitmap-functions/bitmap_union.md | 59 - .../bitmap-functions/bitmap_xor.md | 76 - .../bitmap-functions/bitmap_xor_count.md | 84 - .../orthogonal_bitmap_intersect.md | 47 - .../orthogonal_bitmap_intersect_count.md | 46 - .../orthogonal_bitmap_union_count.md | 47 - .../bitmap-functions/sub_bitmap.md | 62 - .../bitmap-functions/to_bitmap.md | 61 - .../sql-functions/bitwise-functions/bitand.md | 57 - .../sql-functions/bitwise-functions/bitnot.md | 57 - .../sql-functions/bitwise-functions/bitor.md | 57 - .../sql-functions/bitwise-functions/bitxor.md | 57 - .../zh-CN/sql-manual/sql-functions/cast.md | 82 - .../conditional-functions/case.md | 72 - .../conditional-functions/coalesce.md | 47 - .../sql-functions/conditional-functions/if.md | 50 - .../conditional-functions/ifnull.md | 54 - .../conditional-functions/nullif.md | 61 - .../date-time-functions/convert_tz.md | 55 - .../date-time-functions/curdate.md | 55 - .../date-time-functions/current_timestamp.md | 49 - .../date-time-functions/curtime.md | 50 - .../date-time-functions/date_add.md | 55 - .../date-time-functions/date_format.md | 168 - .../date-time-functions/date_sub.md | 55 - .../date-time-functions/datediff.md | 58 - .../sql-functions/date-time-functions/day.md | 49 - .../date-time-functions/dayname.md | 50 - .../date-time-functions/dayofmonth.md | 51 - .../date-time-functions/dayofweek.md | 58 - .../date-time-functions/dayofyear.md | 51 - .../date-time-functions/from_days.md | 49 - .../date-time-functions/from_unixtime.md | 80 - .../sql-functions/date-time-functions/hour.md | 49 - .../date-time-functions/makedate.md | 46 - .../date-time-functions/minute.md | 49 - .../date-time-functions/month.md | 51 - .../date-time-functions/monthname.md | 51 - .../sql-functions/date-time-functions/now.md | 49 - .../date-time-functions/second.md | 49 - .../date-time-functions/str_to_date.md | 72 - .../date-time-functions/time_round.md | 86 - .../date-time-functions/timediff.md | 65 - .../date-time-functions/timestampadd.md | 59 - .../date-time-functions/timestampdiff.md | 67 - .../date-time-functions/to_date.md | 48 - .../date-time-functions/to_days.md | 51 - .../date-time-functions/unix_timestamp.md | 86 - .../date-time-functions/utc_timestamp.md | 53 - .../sql-functions/date-time-functions/week.md | 68 - .../date-time-functions/weekday.md | 68 - .../date-time-functions/weekofyear.md | 52 - .../sql-functions/date-time-functions/year.md | 51 - .../date-time-functions/yearweek.md | 80 - .../sql-functions/digital-masking.md | 56 - .../hash-functions/murmur_hash3_32.md | 61 - .../json-functions/get_json_double.md | 74 - .../json-functions/get_json_int.md | 74 - .../json-functions/get_json_string.md | 84 - .../json-functions/json_array.md | 70 - .../json-functions/json_object.md | 70 - .../json-functions/json_quote.md | 70 - .../sql-functions/math-functions/conv.md | 60 - .../sql-functions/math-functions/pmod.md | 53 - .../spatial-functions/st_astext.md | 47 - .../spatial-functions/st_circle.md | 48 - .../spatial-functions/st_contains.md | 54 - .../spatial-functions/st_distance_sphere.md | 47 - .../spatial-functions/st_geometryfromtext.md | 47 - .../spatial-functions/st_linefromtext.md | 47 - .../spatial-functions/st_point.md | 48 - .../spatial-functions/st_polygon.md | 47 - .../sql-functions/spatial-functions/st_x.md | 47 - .../sql-functions/spatial-functions/st_y.md | 47 - .../append_trailing_char_if_absent.md | 60 - .../sql-functions/string-functions/ascii.md | 54 - .../string-functions/bit_length.md | 54 - .../string-functions/char_length.md | 54 - .../sql-functions/string-functions/concat.md | 61 - .../string-functions/concat_ws.md | 63 - .../string-functions/ends_with.md | 53 - .../string-functions/find_in_set.md | 47 - .../sql-functions/string-functions/hex.md | 85 - .../sql-functions/string-functions/instr.md | 54 - .../sql-functions/string-functions/lcase.md | 37 - .../sql-functions/string-functions/left.md | 47 - .../sql-functions/string-functions/length.md | 54 - .../string-functions/like/like.md | 83 - .../string-functions/like/not_like.md | 85 - .../sql-functions/string-functions/locate.md | 61 - .../sql-functions/string-functions/lower.md | 47 - .../sql-functions/string-functions/lpad.md | 54 - .../sql-functions/string-functions/ltrim.md | 47 - .../string-functions/money_format.md | 61 - .../string-functions/null_or_empty.md | 60 - .../string-functions/regexp/not_regexp.md | 56 - .../string-functions/regexp/regexp.md | 56 - .../string-functions/regexp/regexp_extract.md | 51 - .../string-functions/regexp/regexp_replace.md | 54 - .../sql-functions/string-functions/repeat.md | 54 - .../sql-functions/string-functions/replace.md | 46 - .../sql-functions/string-functions/reverse.md | 56 - .../sql-functions/string-functions/right.md | 47 - .../sql-functions/string-functions/rpad.md | 54 - .../string-functions/split_part.md | 69 - .../string-functions/starts_with.md | 53 - .../sql-functions/string-functions/strleft.md | 47 - .../string-functions/strright.md | 47 - .../string-functions/substring.md | 72 - .../sql-functions/string-functions/unhex.md | 63 - .../table-functions/explode-bitmap.md | 157 - .../table-functions/explode-json-array.md | 286 - .../table-functions/explode-numbers.md | 57 - .../table-functions/explode-split.md | 112 - .../sql-functions/window-function.md | 487 - .../ALTER-SYSTEM-MODIFY-BROKER.md | 38 - .../sql-reference-v2/Data-Types/BIGINT.md | 34 - .../sql-reference-v2/Data-Types/BITMAP.md | 48 - .../sql-reference-v2/Data-Types/BOOLEAN.md | 34 - .../sql-reference-v2/Data-Types/CHAR.md | 34 - .../sql-reference-v2/Data-Types/DATE.md | 42 - .../sql-reference-v2/Data-Types/DATETIME.md | 35 - .../sql-reference-v2/Data-Types/DECIMAL.md | 35 - .../sql-reference-v2/Data-Types/DOUBLE.md | 34 - .../sql-reference-v2/Data-Types/FLOAT.md | 34 - .../sql-reference-v2/Data-Types/HLL.md | 48 - .../sql-reference-v2/Data-Types/INT.md | 34 - .../sql-reference-v2/Data-Types/LARGEINT.md | 33 - .../Data-Types/QUANTILE_STATE.md | 58 - .../sql-reference-v2/Data-Types/SMALLINT.md | 34 - .../sql-reference-v2/Data-Types/STRING.md | 36 - .../sql-reference-v2/Data-Types/TINYINT.md | 34 - .../sql-reference-v2/Data-Types/VARCHAR.md | 36 - .../ENABLE-FEATURE.md | 38 - .../Show-Statements/SHOW-CHARSET.md | 38 - .../Show-Statements/SHOW-COLLATION.md | 38 - .../Show-Statements/SHOW-PROCEDURE.md | 38 - .../Show-Statements/SHOW-QUERY-PROFILE.md | 38 - .../Show-Statements/SHOW-ROLLUP.md | 38 - .../Show-Statements/SHOW-TRIGGERS.md | 38 - .../Show-Statements/SHOW-WARNING.md | 38 - .../Show-Statements/SHOW-WHITE-LIST.md | 38 - 2222 files changed, 2026 insertions(+), 151178 deletions(-) delete mode 100644 docs/.markdownlintignore rename {new-docs => docs}/en/admin-manual/cluster-management/elastic-expansion.md (100%) rename {new-docs => docs}/en/admin-manual/cluster-management/load-balancing.md (100%) rename {new-docs => docs}/en/admin-manual/cluster-management/upgrade.md (100%) rename {new-docs => docs}/en/admin-manual/config/be-config.md (100%) rename {new-docs => docs}/en/admin-manual/config/fe-config.md (100%) rename {new-docs => docs}/en/admin-manual/config/user-property.md (100%) rename {new-docs => docs}/en/admin-manual/data-admin/backup.md (100%) rename {new-docs => docs}/en/admin-manual/data-admin/delete-recover.md (100%) rename {new-docs => docs}/en/admin-manual/data-admin/restore.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/cancel-label.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/check-reset-rpc-cache.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/compaction-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/connection-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe-get-log-file.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/backends-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/bootstrap-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/cancel-load-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/check-decommission-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/check-storage-type-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/config-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/connection-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/get-ddl-stmt-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/get-load-info-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/get-load-state.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/get-log-file-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/get-small-file.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/ha-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/hardware-info-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/health-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/log-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/logout-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/manager/cluster-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/manager/node-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/manager/query-profile-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/meta-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/meta-info-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/meta-replay-state-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/profile-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/query-detail-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/query-profile-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/row-count-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/session-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/set-config-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/show-data-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/show-meta-info-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/show-proc-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/show-runtime-info-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/statement-execution-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/system-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/table-query-plan-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/table-row-count-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/table-schema-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/fe/upload-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/get-load-state.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/get-tablets.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/profile-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/query-detail-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/restore-tablet.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/show-data-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/tablet-migration-action.md (100%) rename docs/en/{administrator-guide => admin-manual}/http-actions/tablets_distribution.md (100%) rename {new-docs => docs}/en/admin-manual/maint-monitor/be-olap-error-code.md (100%) rename {new-docs => docs}/en/admin-manual/maint-monitor/disk-capacity.md (100%) rename docs/en/{administrator-guide/operation => admin-manual/maint-monitor}/doris-error-code.md (100%) rename {new-docs => docs}/en/admin-manual/maint-monitor/metadata-operation.md (100%) rename docs/en/{administrator-guide/operation => admin-manual/maint-monitor}/monitor-alert.md (100%) rename docs/en/{administrator-guide/operation => admin-manual/maint-monitor}/monitor-metrics/be-metrics.md (100%) rename docs/en/{administrator-guide/operation => admin-manual/maint-monitor}/monitor-metrics/fe-metrics.md (100%) rename docs/en/{administrator-guide/operation => admin-manual/maint-monitor}/multi-tenant.md (100%) rename {new-docs => docs}/en/admin-manual/maint-monitor/tablet-meta-tool.md (100%) rename {new-docs => docs}/en/admin-manual/maint-monitor/tablet-repair-and-balance.md (100%) rename docs/en/{administrator-guide/operation => admin-manual/maint-monitor}/tablet-restore-tool.md (100%) rename {new-docs => docs}/en/admin-manual/multi-tenant.md (100%) rename {new-docs => docs}/en/admin-manual/optimization.md (100%) rename {new-docs => docs}/en/admin-manual/privilege-ldap/ldap.md (100%) rename {new-docs => docs}/en/admin-manual/privilege-ldap/user-privilege.md (100%) rename {new-docs => docs}/en/admin-manual/query-profile.md (100%) rename {new-docs => docs}/en/admin-manual/sql-interception.md (100%) delete mode 100644 docs/en/administrator-guide/alter-table/alter-table-bitmap-index.md delete mode 100644 docs/en/administrator-guide/alter-table/alter-table-replace-table.md delete mode 100644 docs/en/administrator-guide/alter-table/alter-table-rollup.md delete mode 100644 docs/en/administrator-guide/alter-table/alter-table-schema-change.md delete mode 100644 docs/en/administrator-guide/alter-table/alter-table-temp-partition.md delete mode 100644 docs/en/administrator-guide/backup-restore.md delete mode 100644 docs/en/administrator-guide/block-rule/sql-block.md delete mode 100644 docs/en/administrator-guide/broker.md delete mode 100644 docs/en/administrator-guide/bucket-shuffle-join.md delete mode 100644 docs/en/administrator-guide/colocation-join.md delete mode 100644 docs/en/administrator-guide/config/be_config.md delete mode 100644 docs/en/administrator-guide/config/fe_config.md delete mode 100644 docs/en/administrator-guide/config/user_property.md delete mode 100644 docs/en/administrator-guide/dynamic-partition.md delete mode 100644 docs/en/administrator-guide/export-manual.md delete mode 100644 docs/en/administrator-guide/ldap.md delete mode 100644 docs/en/administrator-guide/load-data/batch-delete-manual.md delete mode 100644 docs/en/administrator-guide/load-data/binlog-load-manual.md delete mode 100644 docs/en/administrator-guide/load-data/broker-load-manual.md delete mode 100644 docs/en/administrator-guide/load-data/delete-manual.md delete mode 100644 docs/en/administrator-guide/load-data/insert-into-manual.md delete mode 100644 docs/en/administrator-guide/load-data/load-json-format.md delete mode 100644 docs/en/administrator-guide/load-data/load-manual.md delete mode 100644 docs/en/administrator-guide/load-data/routine-load-manual.md delete mode 100644 docs/en/administrator-guide/load-data/s3-load-manual.md delete mode 100644 docs/en/administrator-guide/load-data/sequence-column-manual.md delete mode 100644 docs/en/administrator-guide/load-data/spark-load-manual.md delete mode 100644 docs/en/administrator-guide/load-data/stream-load-manual.md delete mode 100644 docs/en/administrator-guide/materialized_view.md delete mode 100644 docs/en/administrator-guide/multi-tenant.md delete mode 100644 docs/en/administrator-guide/operation/be-olap-error-code.md delete mode 100644 docs/en/administrator-guide/operation/disk-capacity.md delete mode 100644 docs/en/administrator-guide/operation/metadata-operation.md delete mode 100644 docs/en/administrator-guide/operation/tablet-meta-tool.md delete mode 100644 docs/en/administrator-guide/operation/tablet-repair-and-balance.md delete mode 100644 docs/en/administrator-guide/outfile.md delete mode 100644 docs/en/administrator-guide/privilege.md delete mode 100644 docs/en/administrator-guide/query_cache.md delete mode 100644 docs/en/administrator-guide/resource-management.md delete mode 100644 docs/en/administrator-guide/running-profile.md delete mode 100644 docs/en/administrator-guide/runtime-filter.md delete mode 100644 docs/en/administrator-guide/small-file-mgr.md delete mode 100644 docs/en/administrator-guide/sql-mode.md delete mode 100644 docs/en/administrator-guide/time-zone.md delete mode 100644 docs/en/administrator-guide/update.md delete mode 100644 docs/en/administrator-guide/variables.md rename {new-docs => docs}/en/advanced/alter-table/replace-table.md (100%) rename {new-docs => docs}/en/advanced/alter-table/schema-change.md (100%) rename {new-docs => docs}/en/advanced/best-practice/debug-log.md (100%) rename {new-docs => docs}/en/advanced/best-practice/import-analysis.md (100%) rename {new-docs => docs}/en/advanced/best-practice/query-analysis.md (100%) rename {new-docs => docs}/en/advanced/broker.md (100%) rename {new-docs => docs}/en/advanced/cache/partition-cache.md (100%) rename {new-docs => docs}/en/advanced/cache/query-cache.md (100%) rename {new-docs => docs}/en/advanced/join-optimization/bucket-shuffle-join.md (100%) rename {new-docs => docs}/en/advanced/join-optimization/colocation-join.md (100%) rename {new-docs => docs}/en/advanced/join-optimization/runtime-filter.md (100%) rename {new-docs => docs}/en/advanced/materialized-view.md (100%) rename docs/en/{administrator-guide => advanced}/orthogonal-bitmap-manual.md (100%) rename {new-docs => docs}/en/advanced/orthogonal-hll-manual.md (100%) rename {new-docs => docs}/en/advanced/partition/dynamic-partition.md (100%) rename {new-docs => docs}/en/advanced/partition/table-temp-partition.md (100%) rename {new-docs => docs}/en/advanced/resource.md (100%) rename {new-docs => docs}/en/advanced/small-file-mgr.md (100%) rename {new-docs => docs}/en/advanced/time-zone.md (100%) rename {new-docs => docs}/en/advanced/variables.md (100%) rename docs/en/{administrator-guide => advanced}/vectorized-execution-engine.md (100%) delete mode 100644 docs/en/benchmark/samples.md rename {new-docs => docs}/en/benchmark/ssb.md (100%) delete mode 100644 docs/en/benchmark/star-schema-benchmark.md delete mode 100644 docs/en/benchmark/systemd.md rename {new-docs => docs}/en/benchmark/tpc-h.md (100%) rename {new-docs => docs}/en/data-operate/export/export-manual.md (100%) rename docs/en/{administrator-guide => data-operate/export}/export_with_mysql_dump.md (100%) rename {new-docs => docs}/en/data-operate/export/outfile.md (100%) rename {new-docs => docs}/en/data-operate/import/import-scenes/external-storage-load.md (100%) rename {new-docs => docs}/en/data-operate/import/import-scenes/external-table-load.md (100%) rename {new-docs => docs}/en/data-operate/import/import-scenes/jdbc-load.md (100%) rename {new-docs => docs}/en/data-operate/import/import-scenes/kafka-load.md (100%) rename {new-docs => docs}/en/data-operate/import/import-scenes/load-atomicity.md (100%) rename {new-docs => docs}/en/data-operate/import/import-scenes/load-data-convert.md (100%) rename {new-docs => docs}/en/data-operate/import/import-scenes/load-strict-mode.md (100%) rename {new-docs => docs}/en/data-operate/import/import-scenes/local-file-load.md (100%) rename {new-docs => docs}/en/data-operate/import/import-way/binlog-load-manual.md (100%) rename {new-docs => docs}/en/data-operate/import/import-way/broker-load-manual.md (100%) rename {new-docs => docs}/en/data-operate/import/import-way/insert-into-manual.md (100%) rename {new-docs => docs}/en/data-operate/import/import-way/load-json-format.md (100%) rename {new-docs => docs}/en/data-operate/import/import-way/routine-load-manual.md (100%) rename {new-docs => docs}/en/data-operate/import/import-way/s3-load-manual.md (100%) rename {new-docs => docs}/en/data-operate/import/import-way/spark-load-manual.md (100%) rename {new-docs => docs}/en/data-operate/import/import-way/stream-load-manual.md (100%) rename {new-docs => docs}/en/data-operate/import/load-manual.md (100%) rename {new-docs => docs}/en/data-operate/update-delete/batch-delete-manual.md (100%) rename {new-docs => docs}/en/data-operate/update-delete/delete-manual.md (100%) rename {new-docs => docs}/en/data-operate/update-delete/sequence-column-manual.md (100%) rename {new-docs => docs}/en/data-operate/update-delete/update.md (100%) rename {new-docs => docs}/en/data-table/advance-usage.md (100%) rename {new-docs => docs}/en/data-table/basic-usage.md (100%) rename {new-docs => docs}/en/data-table/best-practice.md (100%) rename {new-docs => docs}/en/data-table/data-model.md (100%) rename {new-docs => docs}/en/data-table/data-partition.md (100%) rename {new-docs => docs}/en/data-table/hit-the-rollup.md (100%) rename {new-docs => docs}/en/data-table/index/bitmap-index.md (100%) rename docs/en/{administrator-guide => data-table/index}/bloomfilter.md (100%) rename {new-docs => docs}/en/data-table/index/prefix-index.md (100%) rename docs/en/{extending-doris => ecosystem}/audit-plugin.md (100%) rename {new-docs => docs}/en/ecosystem/datax.md (100%) create mode 100644 docs/en/ecosystem/doris-manager/cluster-managenent.md create mode 100644 docs/en/ecosystem/doris-manager/compiling-deploying.md create mode 100644 docs/en/ecosystem/doris-manager/initializing.md create mode 100644 docs/en/ecosystem/doris-manager/space-list.md create mode 100644 docs/en/ecosystem/doris-manager/space-management.md create mode 100644 docs/en/ecosystem/doris-manager/system-settings.md rename {new-docs => docs}/en/ecosystem/external-table/doris-on-es.md (100%) rename {new-docs => docs}/en/ecosystem/external-table/hive-of-doris.md (100%) rename {new-docs => docs}/en/ecosystem/external-table/iceberg-of-doris.md (100%) rename {new-docs => docs}/en/ecosystem/external-table/odbc-of-doris.md (100%) rename {new-docs => docs}/en/ecosystem/flink-doris-connector.md (100%) rename {new-docs => docs}/en/ecosystem/logstash.md (100%) rename docs/en/{extending-doris => ecosystem}/plugin-development-manual.md (100%) rename {new-docs => docs}/en/ecosystem/seatunnel/flink-sink.md (100%) rename {new-docs => docs}/en/ecosystem/seatunnel/spark-sink.md (100%) rename {new-docs => docs}/en/ecosystem/spark-doris-connector.md (100%) rename {new-docs => docs}/en/ecosystem/udf/contribute-udf.md (100%) rename {new-docs => docs}/en/ecosystem/udf/native-user-defined-function.md (100%) rename {new-docs => docs}/en/ecosystem/udf/remote-user-defined-function.md (100%) delete mode 100644 docs/en/extending-doris/datax.md delete mode 100644 docs/en/extending-doris/doris-on-es.md delete mode 100644 docs/en/extending-doris/flink-doris-connector.md delete mode 100644 docs/en/extending-doris/hive-bitmap-udf.md delete mode 100644 docs/en/extending-doris/hive-of-doris.md delete mode 100644 docs/en/extending-doris/iceberg-of-doris.md delete mode 100644 docs/en/extending-doris/logstash.md delete mode 100644 docs/en/extending-doris/odbc-of-doris.md delete mode 100644 docs/en/extending-doris/seatunnel/flink-sink.md delete mode 100644 docs/en/extending-doris/seatunnel/spark-sink.md delete mode 100644 docs/en/extending-doris/spark-doris-connector.md delete mode 100644 docs/en/extending-doris/udf/contribute-udf.md delete mode 100644 docs/en/extending-doris/udf/java-user-defined-function.md delete mode 100644 docs/en/extending-doris/udf/native-user-defined-function.md delete mode 100644 docs/en/extending-doris/udf/remote-user-defined-function.md rename {new-docs => docs}/en/faq/data-faq.md (100%) delete mode 100644 docs/en/faq/error.md delete mode 100644 docs/en/faq/faq.md rename {new-docs => docs}/en/faq/install-faq.md (100%) rename {new-docs => docs}/en/faq/sql-faq.md (100%) rename {new-docs => docs}/en/get-starting/get-starting.md (99%) delete mode 100644 docs/en/getting-started/advance-usage.md delete mode 100644 docs/en/getting-started/basic-usage.md delete mode 100644 docs/en/getting-started/best-practice.md delete mode 100644 docs/en/getting-started/data-model-rollup.md delete mode 100644 docs/en/getting-started/data-partition.md delete mode 100644 docs/en/getting-started/hit-the-rollup.md rename {new-docs => docs}/en/install/install-deploy.md (100%) rename {new-docs => docs}/en/install/source-install/compilation-arm.md (100%) rename docs/en/{installing => install/source-install}/compilation-with-ldb-toolchain.md (100%) rename {new-docs => docs}/en/install/source-install/compilation.md (100%) delete mode 100644 docs/en/installing/compilation-arm.md delete mode 100644 docs/en/installing/compilation.md delete mode 100644 docs/en/installing/install-deploy.md delete mode 100644 docs/en/installing/upgrade.md delete mode 100644 docs/en/internal/Flink doris connector Design.md delete mode 100644 docs/en/internal/doris_storage_optimization.md delete mode 100644 docs/en/internal/grouping_sets_design.md delete mode 100644 docs/en/internal/metadata-design.md rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/approx_count_distinct.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/avg.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/bitmap_union.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/count.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/group_concat.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/hll_union_agg.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/max.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/max_by.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/min.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/min_by.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/percentile.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/percentile_approx.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/stddev.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/stddev_samp.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/sum.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/topn.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/var_samp.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/aggregate-functions/variance.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_and.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_and_count.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_and_not.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_and_not_count.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_contains.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_empty.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_from_string.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_has_all.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_has_any.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_hash.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_intersect.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_max.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_min.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_not.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_or.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_or_count.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_subset_in_range.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_subset_limit.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_to_string.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_union.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_xor.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_xor_count.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/sub_bitmap.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitmap-functions/to_bitmap.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitwise-functions/bitand.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitwise-functions/bitnot.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitwise-functions/bitor.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/bitwise-functions/bitxor.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/cast.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/conditional-functions/case.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/conditional-functions/coalesce.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/conditional-functions/if.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/conditional-functions/ifnull.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/conditional-functions/nullif.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/convert_tz.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/curdate.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/current_timestamp.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/curtime.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/date_add.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/date_format.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/date_sub.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/datediff.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/day.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/dayname.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/dayofmonth.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/dayofweek.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/dayofyear.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/from_days.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/from_unixtime.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/hour.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/makedate.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/minute.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/month.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/monthname.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/now.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/second.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/str_to_date.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/time_round.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/timediff.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/timestampadd.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/timestampdiff.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/to_date.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/to_days.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/unix_timestamp.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/utc_timestamp.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/week.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/weekday.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/weekofyear.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/year.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/date-time-functions/yearweek.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/digital-masking.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/aes.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/md5.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/md5sum.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/sm3.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/sm3sum.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/sm4.md (100%) rename {new-docs/en/sql-manual/sql-functions/encrypt-digest-functions => docs/en/sql-manual/sql-functions/encrypt-dixgest-functions}/aes.md (100%) rename {new-docs/en/sql-manual/sql-functions/encrypt-digest-functions => docs/en/sql-manual/sql-functions/encrypt-dixgest-functions}/md5.md (100%) rename {new-docs/en/sql-manual/sql-functions/encrypt-digest-functions => docs/en/sql-manual/sql-functions/encrypt-dixgest-functions}/md5sum.md (100%) rename {new-docs/en/sql-manual/sql-functions/encrypt-digest-functions => docs/en/sql-manual/sql-functions/encrypt-dixgest-functions}/sm3.md (100%) rename {new-docs/en/sql-manual/sql-functions/encrypt-digest-functions => docs/en/sql-manual/sql-functions/encrypt-dixgest-functions}/sm3sum.md (100%) rename {new-docs/en/sql-manual/sql-functions/encrypt-digest-functions => docs/en/sql-manual/sql-functions/encrypt-dixgest-functions}/sm4.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/hash-functions/murmur_hash3_32.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/json-functions/get_json_double.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/json-functions/get_json_int.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/json-functions/get_json_string.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/json-functions/json_array.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/json-functions/json_object.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/json-functions/json_quote.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/math-functions/conv.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/math-functions/pmod.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_astext.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_circle.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_contains.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_distance_sphere.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_geometryfromtext.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_linefromtext.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_point.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_polygon.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_x.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_y.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/append_trailing_char_if_absent.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/ascii.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/bit_length.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/char_length.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/concat.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/concat_ws.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/ends_with.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/find_in_set.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/hex.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/instr.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/lcase.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/left.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/length.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/like/like.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/like/not_like.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/locate.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/lower.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/lpad.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/ltrim.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/money_format.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/null_or_empty.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/regexp/not_regexp.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/regexp/regexp.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/regexp/regexp_extract.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/regexp/regexp_replace.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/repeat.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/replace.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/reverse.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/right.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/rpad.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/split_part.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/starts_with.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/strleft.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/strright.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/substring.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/string-functions/unhex.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/table-functions/explode-bitmap.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/table-functions/explode-json-array.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/table-functions/explode-numbers.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/table-functions/explode-split.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/table-functions/outer-combinator.md (100%) rename docs/en/{sql-reference => sql-manual}/sql-functions/window-function.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/BIGINT.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/BITMAP.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/BOOLEAN.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/CHAR.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/DATE.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/DATETIME.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/DECIMAL.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/DOUBLE.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/FLOAT.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/HLL.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/INT.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/QUANTILE_STATE.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/SMALLINT.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/STRING.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/TINYINT.md (100%) rename docs/en/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/VARCHAR.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md (100%) rename docs/en/{sql-reference/sql-statements/Administration/ADMIN SHOW TABLET STORAGE FORMAT.md => sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-TABLET-STORAGE-FORMAT.md} (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Database-Administration-Statements/ENABLE-FEATURE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-CHARSET.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-COLLATION.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-ENGINES.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-EVENTS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-OPEN-TABLES.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-PROCEDURE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-QUERY-PROFILE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-ROLLUP.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-TRIGGERS.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-USER.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-WARNING.md (100%) rename docs/en/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-WHITE-LIST.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Utility-Statements/HELP.md (100%) rename {new-docs => docs}/en/sql-manual/sql-reference-v2/Utility-Statements/USE.md (100%) delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Utility-Statements/HELP.md delete mode 100644 docs/en/sql-reference-v2/sql-statements/Utility-Statements/USE.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/CREATE ROLE.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/CREATE USER.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/DROP ROLE.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/DROP USER.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/GRANT.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/REVOKE.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/SET PASSWORD.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/SET PROPERTY.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/SHOW GRANTS.md delete mode 100644 docs/en/sql-reference/sql-statements/Account Management/SHOW ROLES.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN COMPACT.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN REPAIR.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ALTER CLUSTER.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ALTER SYSTEM.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/CREATE CLUSTER.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/CREATE FILE.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/DROP CLUSTER.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/DROP FILE.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/ENTER.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/LINK DATABASE.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW BACKENDS.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW BROKER.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW FILE.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW INDEX.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW PLUGINS.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW TRASH.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/SHOW VIEW.md delete mode 100644 docs/en/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/ALTER TABLE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/ALTER VIEW.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/BACKUP.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CREATE INDEX.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/CREATE VIEW.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/Colocate Join.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/DROP DATABASE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/DROP INDEX.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/DROP TABLE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/DROP VIEW.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/HLL.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/RECOVER.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/RESTORE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/create-function.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/drop-function.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Definition/show-functions.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/BEGIN.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/GROUP BY.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/OUTFILE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/UPDATE.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/insert.md delete mode 100644 docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md delete mode 100644 docs/en/sql-reference/sql-statements/Utility/util_stmt.md rename {new-docs => docs}/en/summary/basic-summary.md (100%) rename {new-docs => docs}/en/summary/system-architecture.md (95%) rename {new-docs => docs}/zh-CN/admin-manual/cluster-management/elastic-expansion.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/cluster-management/load-balancing.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/cluster-management/upgrade.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/config/be-config.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/config/fe-config.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/config/user-property.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/data-admin/backup.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/data-admin/delete-recover.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/data-admin/restore.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/cancel-label.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/check-reset-rpc-cache.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/compaction-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/connection-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe-get-log-file.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/backends-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/bootstrap-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/cancel-load-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/check-decommission-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/check-storage-type-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/config-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/connection-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/get-ddl-stmt-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/get-load-info-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/get-load-state.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/get-log-file-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/get-small-file.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/ha-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/hardware-info-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/health-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/log-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/logout-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/manager/cluster-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/manager/node-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/manager/query-profile-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/meta-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/meta-info-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/meta-replay-state-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/profile-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/query-detail-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/query-profile-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/row-count-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/session-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/set-config-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/show-data-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/show-meta-info-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/show-proc-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/show-runtime-info-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/statement-execution-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/system-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/table-query-plan-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/table-row-count-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/table-schema-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/fe/upload-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/get-load-state.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/get-tablets.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/profile-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/query-detail-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/restore-tablet.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/show-data-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/tablet-migration-action.md (100%) rename docs/zh-CN/{administrator-guide => admin-manual}/http-actions/tablets_distribution.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/maint-monitor/be-olap-error-code.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/maint-monitor/disk-capacity.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/maint-monitor/doris-error-code.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/maint-monitor/metadata-operation.md (100%) rename docs/zh-CN/{administrator-guide/operation => admin-manual/maint-monitor}/monitor-alert.md (100%) rename docs/zh-CN/{administrator-guide/operation => admin-manual/maint-monitor}/monitor-metrics/be-metrics.md (100%) rename docs/zh-CN/{administrator-guide/operation => admin-manual/maint-monitor}/monitor-metrics/fe-metrics.md (100%) rename docs/zh-CN/{administrator-guide/operation => admin-manual/maint-monitor}/tablet-meta-tool.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/maint-monitor/tablet-repair-and-balance.md (100%) rename docs/zh-CN/{administrator-guide/operation => admin-manual/maint-monitor}/tablet-restore-tool.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/multi-tenant.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/optimization.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/privilege-ldap/ldap.md (100%) rename {new-docs => docs}/zh-CN/admin-manual/privilege-ldap/user-privilege.md (100%) rename docs/zh-CN/{administrator-guide/running-profile.md => admin-manual/query-profile.md} (99%) rename {new-docs => docs}/zh-CN/admin-manual/sql-interception.md (100%) delete mode 100644 docs/zh-CN/administrator-guide/alter-table/alter-table-bitmap-index.md delete mode 100644 docs/zh-CN/administrator-guide/alter-table/alter-table-replace-table.md delete mode 100644 docs/zh-CN/administrator-guide/alter-table/alter-table-rollup.md delete mode 100644 docs/zh-CN/administrator-guide/alter-table/alter-table-schema-change.md delete mode 100644 docs/zh-CN/administrator-guide/alter-table/alter-table-temp-partition.md delete mode 100644 docs/zh-CN/administrator-guide/backup-restore.md delete mode 100644 docs/zh-CN/administrator-guide/block-rule/sql-block.md delete mode 100644 docs/zh-CN/administrator-guide/bloomfilter.md delete mode 100644 docs/zh-CN/administrator-guide/broker.md delete mode 100644 docs/zh-CN/administrator-guide/bucket-shuffle-join.md delete mode 100644 docs/zh-CN/administrator-guide/colocation-join.md delete mode 100644 docs/zh-CN/administrator-guide/config/be_config.md delete mode 100644 docs/zh-CN/administrator-guide/config/fe_config.md delete mode 100644 docs/zh-CN/administrator-guide/config/user_property.md delete mode 100644 docs/zh-CN/administrator-guide/dynamic-partition.md delete mode 100644 docs/zh-CN/administrator-guide/export-manual.md delete mode 100644 docs/zh-CN/administrator-guide/export_with_mysql_dump.md delete mode 100644 docs/zh-CN/administrator-guide/ldap.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/batch-delete-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/broker-load-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/delete-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/insert-into-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/load-json-format.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/load-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/routine-load-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/s3-load-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/sequence-column-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/spark-load-manual.md delete mode 100644 docs/zh-CN/administrator-guide/load-data/stream-load-manual.md delete mode 100644 docs/zh-CN/administrator-guide/materialized_view.md delete mode 100644 docs/zh-CN/administrator-guide/multi-tenant.md delete mode 100644 docs/zh-CN/administrator-guide/operation/be-olap-error-code.md delete mode 100644 docs/zh-CN/administrator-guide/operation/disk-capacity.md delete mode 100644 docs/zh-CN/administrator-guide/operation/doris-error-code.md delete mode 100644 docs/zh-CN/administrator-guide/operation/metadata-operation.md delete mode 100644 docs/zh-CN/administrator-guide/operation/multi-tenant.md delete mode 100644 docs/zh-CN/administrator-guide/operation/tablet-repair-and-balance.md delete mode 100644 docs/zh-CN/administrator-guide/orthogonal-bitmap-manual.md delete mode 100644 docs/zh-CN/administrator-guide/outfile.md delete mode 100644 docs/zh-CN/administrator-guide/partition_cache.md delete mode 100644 docs/zh-CN/administrator-guide/privilege.md delete mode 100644 docs/zh-CN/administrator-guide/query_cache.md delete mode 100644 docs/zh-CN/administrator-guide/resource-management.md delete mode 100644 docs/zh-CN/administrator-guide/runtime-filter.md delete mode 100644 docs/zh-CN/administrator-guide/segment-v2-usage.md delete mode 100644 docs/zh-CN/administrator-guide/small-file-mgr.md delete mode 100644 docs/zh-CN/administrator-guide/sql-mode.md delete mode 100644 docs/zh-CN/administrator-guide/time-zone.md delete mode 100644 docs/zh-CN/administrator-guide/update.md delete mode 100644 docs/zh-CN/administrator-guide/variables.md rename {new-docs => docs}/zh-CN/advanced/alter-table/replace-table.md (100%) rename {new-docs => docs}/zh-CN/advanced/alter-table/schema-change.md (100%) rename {new-docs => docs}/zh-CN/advanced/best-practice/debug-log.md (100%) rename {new-docs => docs}/zh-CN/advanced/best-practice/import-analysis.md (100%) rename {new-docs => docs}/zh-CN/advanced/best-practice/query-analysis.md (100%) rename {new-docs => docs}/zh-CN/advanced/broker.md (100%) rename {new-docs => docs}/zh-CN/advanced/cache/partition-cache.md (100%) rename {new-docs => docs}/zh-CN/advanced/join-optimization/bucket-shuffle-join.md (100%) rename {new-docs => docs}/zh-CN/advanced/join-optimization/colocation-join.md (100%) rename {new-docs => docs}/zh-CN/advanced/join-optimization/runtime-filter.md (100%) rename {new-docs => docs}/zh-CN/advanced/materialized-view.md (100%) rename {new-docs => docs}/zh-CN/advanced/orthogonal-bitmap-manual.md (100%) rename {new-docs => docs}/zh-CN/advanced/partition/dynamic-partition.md (100%) rename {new-docs => docs}/zh-CN/advanced/partition/table-tmp-partition.md (100%) rename {new-docs => docs}/zh-CN/advanced/resource.md (100%) rename {new-docs => docs}/zh-CN/advanced/small-file-mgr.md (100%) rename {new-docs => docs}/zh-CN/advanced/time-zone.md (100%) rename {new-docs => docs}/zh-CN/advanced/variables.md (100%) rename docs/zh-CN/{administrator-guide => advanced}/vectorized-execution-engine.md (100%) delete mode 100644 docs/zh-CN/benchmark/samples.md rename {new-docs => docs}/zh-CN/benchmark/ssb.md (100%) delete mode 100644 docs/zh-CN/benchmark/star-schema-benchmark.md delete mode 100644 docs/zh-CN/benchmark/systemd.md rename {new-docs => docs}/zh-CN/benchmark/tpc-h.md (100%) rename {new-docs => docs}/zh-CN/data-operate/export/export-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/export/export_with_mysql_dump.md (100%) rename {new-docs => docs}/zh-CN/data-operate/export/outfile.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-scenes/external-storage-load.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-scenes/external-table-load.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-scenes/jdbc-load.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-scenes/kafka-load.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-scenes/load-atomicity.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-scenes/load-data-convert.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-scenes/load-strict-mode.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-scenes/local-file-load.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-way/binlog-load-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-way/broker-load-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-way/insert-into-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-way/load-json-format.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-way/routine-load-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-way/s3-load-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-way/spark-load-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/import-way/stream-load-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/import/load-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/update-delete/batch-delete-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/update-delete/delete-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/update-delete/sequence-column-manual.md (100%) rename {new-docs => docs}/zh-CN/data-operate/update-delete/update.md (100%) rename {new-docs => docs}/zh-CN/data-table/advance-usage.md (100%) rename {new-docs => docs}/zh-CN/data-table/basic-usage.md (100%) rename {new-docs => docs}/zh-CN/data-table/best-practice.md (100%) rename {new-docs => docs}/zh-CN/data-table/data-model.md (100%) rename {new-docs => docs}/zh-CN/data-table/data-partition.md (100%) rename {new-docs => docs}/zh-CN/data-table/hit-the-rollup.md (100%) rename {new-docs => docs}/zh-CN/data-table/index/bitmap-index.md (100%) rename {new-docs => docs}/zh-CN/data-table/index/bloomfilter.md (100%) rename {new-docs => docs}/zh-CN/data-table/index/prefix-index.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/audit-plugin.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/datax.md (100%) rename docs/zh-CN/{extending-doris => ecosystem}/doris-manager/cluster-managenent.md (100%) rename docs/zh-CN/{extending-doris => ecosystem}/doris-manager/compiling-deploying.md (100%) rename docs/zh-CN/{extending-doris => ecosystem}/doris-manager/initializing.md (100%) rename docs/zh-CN/{extending-doris => ecosystem}/doris-manager/space-list.md (100%) rename docs/zh-CN/{extending-doris => ecosystem}/doris-manager/space-management.md (100%) rename docs/zh-CN/{extending-doris => ecosystem}/doris-manager/system-settings.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/external-table/doris-on-es.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/external-table/hive-of-doris.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/external-table/iceberg-of-doris.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/external-table/odbc-of-doris.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/flink-doris-connector.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/logstash.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/plugin-development-manual.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/seatunnel/flink-sink.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/seatunnel/spark-sink.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/spark-doris-connector.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/udf/contribute-udf.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/udf/native-user-defined-function.md (100%) rename {new-docs => docs}/zh-CN/ecosystem/udf/remote-user-defined-function.md (100%) delete mode 100644 docs/zh-CN/extending-doris/audit-plugin.md delete mode 100644 docs/zh-CN/extending-doris/datax.md delete mode 100644 docs/zh-CN/extending-doris/doris-on-es.md delete mode 100644 docs/zh-CN/extending-doris/flink-doris-connector.md delete mode 100644 docs/zh-CN/extending-doris/hive-bitmap-udf.md delete mode 100644 docs/zh-CN/extending-doris/hive-of-doris.md delete mode 100644 docs/zh-CN/extending-doris/iceberg-of-doris.md delete mode 100644 docs/zh-CN/extending-doris/logstash.md delete mode 100644 docs/zh-CN/extending-doris/odbc-of-doris.md delete mode 100644 docs/zh-CN/extending-doris/plugin-development-manual.md delete mode 100644 docs/zh-CN/extending-doris/seatunnel/flink-sink.md delete mode 100644 docs/zh-CN/extending-doris/seatunnel/spark-sink.md delete mode 100644 docs/zh-CN/extending-doris/spark-doris-connector.md delete mode 100644 docs/zh-CN/extending-doris/udf/contribute-udf.md delete mode 100644 docs/zh-CN/extending-doris/udf/java-user-defined-function.md delete mode 100644 docs/zh-CN/extending-doris/udf/native-user-defined-function.md delete mode 100644 docs/zh-CN/extending-doris/udf/remote-user-defined-function.md rename {new-docs => docs}/zh-CN/faq/data-faq.md (100%) delete mode 100644 docs/zh-CN/faq/error.md delete mode 100644 docs/zh-CN/faq/faq.md rename {new-docs => docs}/zh-CN/faq/install-faq.md (100%) rename {new-docs => docs}/zh-CN/faq/sql-faq.md (100%) rename {new-docs => docs}/zh-CN/get-starting/get-starting.md (99%) delete mode 100644 docs/zh-CN/getting-started/advance-usage.md delete mode 100644 docs/zh-CN/getting-started/basic-usage.md delete mode 100644 docs/zh-CN/getting-started/best-practice.md delete mode 100644 docs/zh-CN/getting-started/data-model-rollup.md delete mode 100644 docs/zh-CN/getting-started/data-partition.md delete mode 100644 docs/zh-CN/getting-started/hit-the-rollup.md rename {new-docs => docs}/zh-CN/install/install-deploy.md (100%) rename {new-docs => docs}/zh-CN/install/source-install/compilation-arm.md (100%) rename {new-docs => docs}/zh-CN/install/source-install/compilation-with-ldb-toolchain.md (100%) rename {new-docs => docs}/zh-CN/install/source-install/compilation.md (100%) delete mode 100644 docs/zh-CN/installing/compilation-arm.md delete mode 100644 docs/zh-CN/installing/compilation-with-ldb-toolchain.md delete mode 100644 docs/zh-CN/installing/compilation.md delete mode 100644 docs/zh-CN/installing/install-deploy.md delete mode 100644 docs/zh-CN/installing/upgrade.md delete mode 100644 docs/zh-CN/internal/doris_storage_optimization.md delete mode 100644 docs/zh-CN/internal/flink_doris_connector_design.md delete mode 100644 docs/zh-CN/internal/grouping_sets_design.md delete mode 100644 docs/zh-CN/internal/metadata-design.md delete mode 100644 docs/zh-CN/internal/spark_load.md rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/approx_count_distinct.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/avg.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/bitmap_union.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/count.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/group_concat.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/hll_union_agg.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/max.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/max_by.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/min.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/min_by.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/percentile.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/percentile_approx.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/stddev.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/stddev_samp.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/sum.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/topn.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/var_samp.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/aggregate-functions/variance.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_and.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_and_count.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_and_not.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_and_not_count.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_contains.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_empty.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_from_string.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_has_all.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_has_any.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_hash.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_intersect.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_max.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_min.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_not.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_or.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_or_count.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_subset_in_range.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_subset_limit.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_to_string.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_union.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_xor.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/bitmap_xor_count.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/sub_bitmap.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitmap-functions/to_bitmap.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitwise-functions/bitand.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitwise-functions/bitnot.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitwise-functions/bitor.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/bitwise-functions/bitxor.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/cast.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/conditional-functions/case.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/conditional-functions/coalesce.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/conditional-functions/if.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/conditional-functions/ifnull.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/conditional-functions/nullif.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/convert_tz.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/curdate.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/current_timestamp.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/curtime.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/date_add.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/date_format.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/date_sub.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/datediff.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/day.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/dayname.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/dayofmonth.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/dayofweek.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/dayofyear.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/from_days.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/from_unixtime.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/hour.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/makedate.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/minute.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/month.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/monthname.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/now.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/second.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/str_to_date.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/time_round.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/timediff.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/timestampadd.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/timestampdiff.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/to_date.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/to_days.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/unix_timestamp.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/utc_timestamp.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/week.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/weekday.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/weekofyear.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/year.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/date-time-functions/yearweek.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/digital-masking.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/aes.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/md5.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/md5sum.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/sm3.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/sm3sum.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/encrypt-digest-functions/sm4.md (100%) rename {new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions => docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions}/aes.md (100%) rename {new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions => docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions}/md5.md (100%) rename {new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions => docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions}/md5sum.md (100%) rename {new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions => docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions}/sm3.md (100%) rename {new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions => docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions}/sm3sum.md (100%) rename {new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions => docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions}/sm4.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/hash-functions/murmur_hash3_32.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/json-functions/get_json_double.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/json-functions/get_json_int.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/json-functions/get_json_string.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/json-functions/json_array.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/json-functions/json_object.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/json-functions/json_quote.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/math-functions/conv.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/math-functions/pmod.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_astext.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_circle.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_contains.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_distance_sphere.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_geometryfromtext.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_linefromtext.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_point.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_polygon.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_x.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/spatial-functions/st_y.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/append_trailing_char_if_absent.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/ascii.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/bit_length.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/char_length.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/concat.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/concat_ws.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/ends_with.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/find_in_set.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/hex.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/instr.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/lcase.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/left.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/length.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/like/like.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/like/not_like.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/locate.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/lower.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/lpad.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/ltrim.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/money_format.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/null_or_empty.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/regexp/not_regexp.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/regexp/regexp.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/regexp/regexp_extract.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/regexp/regexp_replace.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/repeat.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/replace.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/reverse.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/right.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/rpad.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/split_part.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/starts_with.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/strleft.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/strright.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/substring.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/string-functions/unhex.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/table-functions/explode-bitmap.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/table-functions/explode-json-array.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/table-functions/explode-numbers.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/table-functions/explode-split.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/table-functions/outer-combinator.md (100%) rename docs/zh-CN/{sql-reference => sql-manual}/sql-functions/window-function.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/BIGINT.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/BITMAP.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/BOOLEAN.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/CHAR.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/DATE.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/DATETIME.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/DECIMAL.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/DOUBLE.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/FLOAT.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/HLL.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/INT.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/LARGEINT.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/QUANTILE_STATE.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/SMALLINT.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/STRING.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/TINYINT.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Data Types => sql-manual/sql-reference-v2/Data-Types}/VARCHAR.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md (100%) rename docs/zh-CN/{sql-reference/sql-statements/Administration/ADMIN SHOW TABLET STORAGE FORMAT.md => sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-TABLET-STORAGE-FORMAT.md} (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Database-Administration-Statements/ENABLE-FEATURE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-CHARSET.md (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-COLLATION.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-PROCEDURE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-QUERY-PROFILE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-ROLLUP.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-TRIGGERS.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-WARNING.md (100%) rename docs/zh-CN/{sql-reference-v2/sql-statements => sql-manual/sql-reference-v2}/Show-Statements/SHOW-WHITE-LIST.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/HELP.md (100%) rename {new-docs => docs}/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/USE.md (100%) delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENGINES.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-EVENTS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-OPEN-TABLES.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-USER.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/HELP.md delete mode 100644 docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/USE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/CREATE ROLE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/CREATE USER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/DROP ROLE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/DROP USER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/GRANT.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/REVOKE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/SET PASSWORD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/SET PROPERTY.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/SHOW GRANTS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Account Management/SHOW ROLES.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN COMPACT.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN REPAIR.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ALTER CLUSTER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ALTER SYSTEM.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/CREATE CLUSTER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/CREATE FILE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/DROP CLUSTER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/DROP FILE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/ENTER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/LINK DATABASE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW BACKENDS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW BROKER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FILE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW INDEX.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW PLUGINS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW TRASH.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/SHOW VIEW.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER TABLE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER VIEW.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/BACKUP.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE INDEX.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE TABLE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE VIEW.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP DATABASE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP INDEX.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP TABLE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP VIEW.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/HLL.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/RECOVER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/RESTORE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/create-function.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/drop-function.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Definition/show-functions.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BEGIN.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/DELETE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/EXPORT.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/GROUP BY.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SPARK LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/UPDATE.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/insert.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md delete mode 100644 docs/zh-CN/sql-reference/sql-statements/Utility/DESCRIBE.md rename {new-docs => docs}/zh-CN/summary/basic-summary.md (96%) rename {new-docs => docs}/zh-CN/summary/system-architecture.md (96%) delete mode 100644 new-docs/.markdownlint.yml delete mode 100644 new-docs/.vuepress/components/CaseList.vue delete mode 100644 new-docs/.vuepress/config.js delete mode 100644 new-docs/.vuepress/sidebar/en.js delete mode 100644 new-docs/.vuepress/sidebar/zh-CN.js delete mode 100644 new-docs/.vuepress/theme/components/Footer.vue delete mode 100644 new-docs/.vuepress/theme/components/Home.vue delete mode 100644 new-docs/.vuepress/theme/index.js delete mode 100644 new-docs/.vuepress/theme/layouts/Article.vue delete mode 100644 new-docs/.vuepress/theme/layouts/ArticleList.vue delete mode 100644 new-docs/.vuepress/theme/layouts/Layout.vue delete mode 100644 new-docs/.vuepress/theme/styles/index.styl delete mode 100644 new-docs/.vuepress/vuepress.textClipping delete mode 100644 new-docs/README.md delete mode 100755 new-docs/build_help_zip.sh delete mode 100644 new-docs/en/README.md delete mode 100644 new-docs/en/admin-manual/http-actions/cancel-label.md delete mode 100644 new-docs/en/admin-manual/http-actions/check-reset-rpc-cache.md delete mode 100644 new-docs/en/admin-manual/http-actions/compaction-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/connection-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe-get-log-file.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/backends-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/bootstrap-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/cancel-load-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/check-decommission-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/check-storage-type-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/config-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/connection-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/get-ddl-stmt-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/get-load-info-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/get-load-state.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/get-log-file-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/get-small-file.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/ha-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/hardware-info-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/health-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/log-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/logout-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/manager/cluster-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/manager/node-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/manager/query-profile-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/meta-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/meta-info-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/meta-replay-state-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/profile-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/query-detail-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/query-profile-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/row-count-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/session-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/set-config-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/show-data-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/show-meta-info-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/show-proc-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/show-runtime-info-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/statement-execution-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/system-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/table-query-plan-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/table-row-count-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/table-schema-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/fe/upload-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/get-load-state.md delete mode 100644 new-docs/en/admin-manual/http-actions/get-tablets.md delete mode 100644 new-docs/en/admin-manual/http-actions/profile-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/query-detail-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/restore-tablet.md delete mode 100644 new-docs/en/admin-manual/http-actions/show-data-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/tablet-migration-action.md delete mode 100644 new-docs/en/admin-manual/http-actions/tablets_distribution.md delete mode 100644 new-docs/en/admin-manual/maint-monitor/doris-error-code.md delete mode 100644 new-docs/en/admin-manual/maint-monitor/monitor-alert.md delete mode 100644 new-docs/en/admin-manual/maint-monitor/monitor-metrics/be-metrics.md delete mode 100644 new-docs/en/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md delete mode 100644 new-docs/en/admin-manual/maint-monitor/multi-tenant.md delete mode 100644 new-docs/en/admin-manual/maint-monitor/tablet-restore-tool.md delete mode 100644 new-docs/en/advanced/orthogonal-bitmap-manual.md delete mode 100644 new-docs/en/data-operate/export/export_with_mysql_dump.md delete mode 100644 new-docs/en/data-table/index/bloomfilter.md delete mode 100644 new-docs/en/ecosystem/audit-plugin.md delete mode 100644 new-docs/en/ecosystem/plugin-development-manual.md delete mode 100644 new-docs/en/install/source-install/compilation-with-ldb-toolchain.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/approx_count_distinct.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/avg.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/bitmap_union.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/count.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/group_concat.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/hll_union_agg.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/max.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/min.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/percentile.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/percentile_approx.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/stddev.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/stddev_samp.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/sum.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/topn.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/var_samp.md delete mode 100644 new-docs/en/sql-manual/sql-functions/aggregate-functions/variance.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_count.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not_count.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_contains.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_empty.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_from_string.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_all.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_any.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_hash.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_intersect.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_max.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_min.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_not.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or_count.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_in_range.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_limit.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_to_string.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_union.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor_count.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/sub_bitmap.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitmap-functions/to_bitmap.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitwise-functions/bitand.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitwise-functions/bitnot.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitwise-functions/bitor.md delete mode 100644 new-docs/en/sql-manual/sql-functions/bitwise-functions/bitxor.md delete mode 100644 new-docs/en/sql-manual/sql-functions/cast.md delete mode 100644 new-docs/en/sql-manual/sql-functions/conditional-functions/case.md delete mode 100644 new-docs/en/sql-manual/sql-functions/conditional-functions/coalesce.md delete mode 100644 new-docs/en/sql-manual/sql-functions/conditional-functions/if.md delete mode 100644 new-docs/en/sql-manual/sql-functions/conditional-functions/ifnull.md delete mode 100644 new-docs/en/sql-manual/sql-functions/conditional-functions/nullif.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/convert_tz.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/curdate.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/current_timestamp.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/curtime.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/date_add.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/date_format.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/date_sub.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/datediff.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/day.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/dayname.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/dayofmonth.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/dayofweek.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/dayofyear.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/from_days.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/from_unixtime.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/hour.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/makedate.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/minute.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/month.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/monthname.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/now.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/second.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/str_to_date.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/time_round.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/timediff.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/timestampadd.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/timestampdiff.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/to_date.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/to_days.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/unix_timestamp.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/utc_timestamp.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/week.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/weekday.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/weekofyear.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/year.md delete mode 100644 new-docs/en/sql-manual/sql-functions/date-time-functions/yearweek.md delete mode 100644 new-docs/en/sql-manual/sql-functions/digital-masking.md delete mode 100644 new-docs/en/sql-manual/sql-functions/hash-functions/murmur_hash3_32.md delete mode 100644 new-docs/en/sql-manual/sql-functions/json-functions/get_json_double.md delete mode 100644 new-docs/en/sql-manual/sql-functions/json-functions/get_json_int.md delete mode 100644 new-docs/en/sql-manual/sql-functions/json-functions/get_json_string.md delete mode 100644 new-docs/en/sql-manual/sql-functions/json-functions/json_array.md delete mode 100644 new-docs/en/sql-manual/sql-functions/json-functions/json_object.md delete mode 100644 new-docs/en/sql-manual/sql-functions/json-functions/json_quote.md delete mode 100644 new-docs/en/sql-manual/sql-functions/math-functions/conv.md delete mode 100644 new-docs/en/sql-manual/sql-functions/math-functions/pmod.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_astext.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_circle.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_contains.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_distance_sphere.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_geometryfromtext.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_linefromtext.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_point.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_polygon.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_x.md delete mode 100644 new-docs/en/sql-manual/sql-functions/spatial-functions/st_y.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/append_trailing_char_if_absent.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/ascii.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/bit_length.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/char_length.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/concat.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/concat_ws.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/ends_with.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/find_in_set.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/hex.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/instr.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/lcase.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/left.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/length.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/like/like.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/like/not_like.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/locate.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/lower.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/lpad.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/ltrim.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/money_format.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/null_or_empty.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/regexp/not_regexp.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/regexp/regexp.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_replace.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/repeat.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/replace.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/reverse.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/right.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/rpad.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/split_part.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/starts_with.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/strleft.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/strright.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/substring.md delete mode 100644 new-docs/en/sql-manual/sql-functions/string-functions/unhex.md delete mode 100644 new-docs/en/sql-manual/sql-functions/table-functions/explode-bitmap.md delete mode 100644 new-docs/en/sql-manual/sql-functions/table-functions/explode-json-array.md delete mode 100644 new-docs/en/sql-manual/sql-functions/table-functions/explode-split.md delete mode 100644 new-docs/en/sql-manual/sql-functions/window-function.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/BIGINT.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/BITMAP.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/BOOLEAN.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/CHAR.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/DATE.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/DATETIME.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/DECIMAL.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/DOUBLE.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/FLOAT.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/HLL.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/INT.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/QUANTILE_STATE.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/SMALLINT.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/STRING.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/TINYINT.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Data-Types/VARCHAR.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ENABLE-FEATURE.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CHARSET.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLLATION.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCEDURE.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-QUERY-PROFILE.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLLUP.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRIGGERS.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WARNING.md delete mode 100644 new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WHITE-LIST.md delete mode 100644 new-docs/en/summary/Doris System Architecture.md delete mode 100644 new-docs/package-lock.json delete mode 100644 new-docs/package.json delete mode 100644 new-docs/zh-CN/README.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/cancel-label.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/check-reset-rpc-cache.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/compaction-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/connection-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe-get-log-file.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/backends-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/bootstrap-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/cancel-load-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/check-decommission-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/check-storage-type-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/config-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/connection-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/get-ddl-stmt-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/get-load-info-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/get-load-state.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/get-log-file-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/get-small-file.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/ha-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/hardware-info-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/health-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/log-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/logout-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/manager/cluster-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/manager/node-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/manager/query-profile-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/meta-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/meta-info-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/meta-replay-state-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/profile-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/query-detail-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/query-profile-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/row-count-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/session-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/set-config-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/show-data-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/show-meta-info-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/show-proc-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/show-runtime-info-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/statement-execution-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/system-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/table-query-plan-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/table-row-count-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/table-schema-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/fe/upload-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/get-load-state.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/get-tablets.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/profile-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/query-detail-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/restore-tablet.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/show-data-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/tablet-migration-action.md delete mode 100644 new-docs/zh-CN/admin-manual/http-actions/tablets_distribution.md delete mode 100644 new-docs/zh-CN/admin-manual/maint-monitor/monitor-alert.md delete mode 100644 new-docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/be-metrics.md delete mode 100644 new-docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md delete mode 100644 new-docs/zh-CN/admin-manual/maint-monitor/tablet-meta-tool.md delete mode 100644 new-docs/zh-CN/admin-manual/maint-monitor/tablet-restore-tool.md delete mode 100644 new-docs/zh-CN/admin-manual/query-profile.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/approx_count_distinct.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/avg.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/bitmap_union.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/count.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/group_concat.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/hll_union_agg.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/max.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/min.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/percentile.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/percentile_approx.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/stddev.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/stddev_samp.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/sum.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/topn.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/var_samp.md delete mode 100755 new-docs/zh-CN/sql-manual/sql-functions/aggregate-functions/variance.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and_count.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and_not.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and_not_count.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_contains.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_empty.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_from_string.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_has_all.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_has_any.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_hash.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_intersect.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_max.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_min.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_not.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_or.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_or_count.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_subset_in_range.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_subset_limit.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_to_string.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_union.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_xor.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_xor_count.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/sub_bitmap.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitmap-functions/to_bitmap.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitand.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitnot.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitor.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitxor.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/cast.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/conditional-functions/case.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/conditional-functions/coalesce.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/conditional-functions/if.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/conditional-functions/ifnull.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/conditional-functions/nullif.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/convert_tz.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/curdate.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/current_timestamp.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/curtime.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/date_add.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/date_format.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/date_sub.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/datediff.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/day.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayname.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayofmonth.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayofweek.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayofyear.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/from_days.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/from_unixtime.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/hour.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/makedate.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/minute.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/month.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/monthname.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/now.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/second.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/str_to_date.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/time_round.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/timediff.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/timestampadd.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/timestampdiff.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/to_date.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/to_days.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/unix_timestamp.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/utc_timestamp.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/week.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/weekday.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/weekofyear.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/year.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/date-time-functions/yearweek.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/digital-masking.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/hash-functions/murmur_hash3_32.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/json-functions/get_json_double.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/json-functions/get_json_int.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/json-functions/get_json_string.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/json-functions/json_array.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/json-functions/json_object.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/json-functions/json_quote.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/math-functions/conv.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/math-functions/pmod.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_astext.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_circle.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_contains.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_distance_sphere.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_geometryfromtext.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_linefromtext.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_point.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_polygon.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_x.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_y.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/append_trailing_char_if_absent.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/ascii.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/bit_length.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/char_length.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/concat.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/concat_ws.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/ends_with.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/find_in_set.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/hex.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/instr.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/lcase.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/left.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/length.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/like/like.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/like/not_like.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/locate.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/lower.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/lpad.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/ltrim.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/money_format.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/null_or_empty.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/not_regexp.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/regexp.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/regexp_replace.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/repeat.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/replace.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/reverse.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/right.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/rpad.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/split_part.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/starts_with.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/strleft.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/strright.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/substring.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/string-functions/unhex.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/table-functions/explode-bitmap.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/table-functions/explode-json-array.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/table-functions/explode-numbers.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/table-functions/explode-split.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-functions/window-function.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/BIGINT.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/BITMAP.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/BOOLEAN.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/CHAR.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DATE.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DATETIME.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DECIMAL.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DOUBLE.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/FLOAT.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/HLL.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/INT.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/LARGEINT.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/QUANTILE_STATE.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/SMALLINT.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/STRING.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/TINYINT.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/VARCHAR.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ENABLE-FEATURE.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CHARSET.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLLATION.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCEDURE.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-QUERY-PROFILE.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLLUP.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRIGGERS.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-WARNING.md delete mode 100644 new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-WHITE-LIST.md diff --git a/docs/.markdownlintignore b/docs/.markdownlintignore deleted file mode 100644 index 1aea57f9f2..0000000000 --- a/docs/.markdownlintignore +++ /dev/null @@ -1,2 +0,0 @@ -node_modules -.vuepress diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index 5f960c8f3b..46c3afb7a6 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -17,7 +17,7 @@ */ module.exports = [ - { + { title: "Downloads", directoryPath: "downloads/", initialOpenGroupIndex: -1, @@ -25,69 +25,854 @@ module.exports = [ sidebarDepth: 1, }, { - title: "Compilation and Deployment", - directoryPath: "installing/", + title: "Getting Started", + directoryPath: "get-starting/", initialOpenGroupIndex: -1, children: [ - "compilation", - "compilation-with-ldb-toolchain", - "compilation-arm", - "install-deploy", - "upgrade", + "get-starting" ], }, { - title: "Getting Started", - directoryPath: "getting-started/", + title: "Doris Architecture", + directoryPath: "summary/", initialOpenGroupIndex: -1, children: [ + "basic-summary", + "system-architecture" + ], + }, + { + title: "Install and deploy", + directoryPath: "install/", + initialOpenGroupIndex: -1, + children: [ + "install-deploy", + { + title: "Compile", + directoryPath: "source-install/", + initialOpenGroupIndex: -1, + children: [ + "compilation", + "compilation-with-ldb-toolchain", + "compilation-arm" + ], + sidebarDepth: 2, + }, + ] + }, + { + title: "Table Design", + directoryPath: "data-table/", + initialOpenGroupIndex: -1, + children: [ + "data-model", + "data-partition", "basic-usage", "advance-usage", - "data-model-rollup", - "data-partition", "hit-the-rollup", "best-practice", + { + title: "Index", + directoryPath: "index/", + initialOpenGroupIndex: -1, + children: [ + "bloomfilter", + "prefix-index", + "bitmap-index" + ], + }, ], }, { - title: "Administrator Guide", - directoryPath: "administrator-guide/", + title: "Data Operate", + directoryPath: "data-operate/", initialOpenGroupIndex: -1, children: [ { - title: "Load Data", - directoryPath: "load-data/", + title: "Import", + directoryPath: "import/", initialOpenGroupIndex: -1, children: [ "load-manual", - "batch-delete-manual", - "binlog-load-manual", - "broker-load-manual", - "routine-load-manual", - "sequence-column-manual", - "spark-load-manual", - "stream-load-manual", - "s3-load-manual", - "delete-manual", - "insert-into-manual", - "load-json-format", + { + title: "Import Scenes", + directoryPath: "import-scenes/", + initialOpenGroupIndex: -1, + children: [ + "local-file-load", + "external-storage-load", + "kafka-load", + "external-table-load", + "jdbc-load", + "load-atomicity", + "load-data-convert", + "load-strict-mode", + ], + }, + { + title: "Import Way", + directoryPath: "import-way/", + initialOpenGroupIndex: -1, + children: [ + "binlog-load-manual", + "broker-load-manual", + "routine-load-manual", + "spark-load-manual", + "stream-load-manual", + "s3-load-manual", + "insert-into-manual", + "load-json-format", + ], + }, ], - sidebarDepth: 2, }, { - title: "Schema Change", + title: "Export", + directoryPath: "export/", + initialOpenGroupIndex: -1, + children: [ + "export-manual", + "outfile", + "export_with_mysql_dump", + ], + }, + { + title: "Update and Delete", + directoryPath: "update-delete/", + initialOpenGroupIndex: -1, + children: [ + "update", + "sequence-column-manual", + "delete-manual", + "batch-delete-manual" + ], + }, + ], + }, + { + title: "Advanced usage", + directoryPath: "advanced/", + initialOpenGroupIndex: -1, + children: [ + "materialized-view", + { + title: "Alter Table", directoryPath: "alter-table/", initialOpenGroupIndex: -1, children: [ - "alter-table-bitmap-index", - "alter-table-replace-table", - "alter-table-rollup", - "alter-table-schema-change", - "alter-table-temp-partition", + "schema-change", + "replace-table" ], - sidebarDepth: 2, }, - "materialized_view", + { + title: "Doris Partition", + directoryPath: "partition/", + initialOpenGroupIndex: -1, + children: [ + "dynamic-partition", + "table-temp-partition" + ], + }, + { + title: "Join Optimization", + directoryPath: "join-optimization/", + initialOpenGroupIndex: -1, + children: [ + "bucket-shuffle-join", + "colocation-join", + "runtime-filter" + ], + }, + { + title: "Date Cache", + directoryPath: "cache/", + initialOpenGroupIndex: -1, + children: [ + "partition-cache" + ], + }, + "vectorized-execution-engine", + "broker", + "resource", + "orthogonal-bitmap-manual", + "variables", + "time-zone", + "small-file-mgr", + { + title: "Best Practice", + directoryPath: "best-practice/", + initialOpenGroupIndex: -1, + children: [ + "query-analysis", + "import-analysis", + "debug-log" + ] + } + ], + }, + { + title: "Ecosystem", + directoryPath: "ecosystem/", + initialOpenGroupIndex: -1, + children: [ + { + title: "Expansion table", + directoryPath: "external-table/", + initialOpenGroupIndex: -1, + children: [ + "doris-on-es", + "odbc-of-doris", + "hive-of-doris", + "iceberg-of-doris" + ], + }, + "audit-plugin", + "flink-doris-connector", + "spark-doris-connector", + "datax", + "logstash", + { + title: "Doris Manager", + directoryPath: "doris-manager/", + initialOpenGroupIndex: -1, + children: [ + "compiling-deploying", + "initializing", + "cluster-managenent", + "space-list", + "space-management", + "system-settings" + ], + }, + { + title: "SeaTunnel", + directoryPath: "seatunnel/", + initialOpenGroupIndex: -1, + children: [ + "flink-sink", + "spark-sink" + ], + }, + { + title: "UDF", + directoryPath: "udf/", + initialOpenGroupIndex: -1, + children: [ + "native-user-defined-function", + "remote-user-defined-function", + "contribute-udf" + ], + }, + ], + }, + { + title: "SQL manual", + directoryPath: "sql-manual/", + initialOpenGroupIndex: -1, + children: [ + { + title: "SQL Functions", + directoryPath: "sql-functions/", + initialOpenGroupIndex: -1, + children: [ + { + title: "Date Functions", + directoryPath: "date-time-functions/", + initialOpenGroupIndex: -1, + children: [ + "convert_tz", + "curdate", + "current_timestamp", + "curtime", + "date_add", + "date_format", + "date_sub", + "datediff", + "day", + "dayname", + "dayofmonth", + "dayofweek", + "dayofyear", + "from_days", + "from_unixtime", + "hour", + "makedate", + "minute", + "month", + "monthname", + "now", + "second", + "str_to_date", + "time_round", + "timediff", + "timestampadd", + "timestampdiff", + "to_days", + "unix_timestamp", + "utc_timestamp", + "week", + "weekday", + "weekofyear", + "year", + "yearweek", + ], + }, + { + title: "GIS Functions", + directoryPath: "spatial-functions/", + initialOpenGroupIndex: -1, + children: [ + "st_astext", + "st_circle", + "st_contains", + "st_distance_sphere", + "st_geometryfromtext", + "st_linefromtext", + "st_point", + "st_polygon", + "st_x", + "st_y", + ], + }, + { + title: "String Functions", + directoryPath: "string-functions/", + initialOpenGroupIndex: -1, + children: [ + "append_trailing_char_if_absent", + "ascii", + "bit_length", + "char_length", + "concat", + "concat_ws", + "ends_with", + "find_in_set", + "hex", + "instr", + "lcase", + "left", + "length", + "locate", + "lower", + "lpad", + "ltrim", + "money_format", + "null_or_empty", + "repeat", + "replace", + "reverse", + "right", + "rpad", + "split_part", + "starts_with", + "strleft", + "strright", + "substring", + "unhex", + { + title: "Fuzzy Match", + directoryPath: "like/", + initialOpenGroupIndex: -1, + children: [ + "like", + "not_like", + ], + }, + { + title: "Regular Match", + directoryPath: "regexp/", + initialOpenGroupIndex: -1, + children: [ + "regexp", + "regexp_extract", + "regexp_replace", + "not_regexp", + ], + }, + ], + }, + { + title: "Aggregate Functions", + directoryPath: "aggregate-functions/", + initialOpenGroupIndex: -1, + children: [ + "approx_count_distinct", + "avg", + "bitmap_union", + "count", + "group_concat", + "hll_union_agg", + "max", + "max_by", + "min", + "min_by", + "percentile", + "percentile_approx", + "stddev", + "stddev_samp", + "sum", + "topn", + "var_samp", + "variance", + ], + }, + { + title: "Bitmap Functions", + directoryPath: "bitmap-functions/", + initialOpenGroupIndex: -1, + children: [ + "bitmap_and", + "bitmap_contains", + "bitmap_empty", + "bitmap_from_string", + "bitmap_has_any", + "bitmap_has_all", + "bitmap_hash", + "bitmap_intersect", + "bitmap_or", + "bitmap_and_count", + "bitmap_or_count", + "bitmap_xor", + "bitmap_xor_count", + "bitmap_not", + "bitmap_and_not", + "bitmap_and_not_count", + "bitmap_subset_in_range", + "bitmap_subset_limit", + "sub_bitmap", + "bitmap_to_string", + "bitmap_union", + "bitmap_xor", + "to_bitmap", + "bitmap_max", + "orthogonal_bitmap_intersect", + "orthogonal_bitmap_intersect_count", + "orthogonal_bitmap_union_count", + ], + }, + { + title: "Bitwise Functions", + directoryPath: "bitwise-functions/", + initialOpenGroupIndex: -1, + children: [ + "bitand", + "bitor", + "bitxor", + "bitnot" + ], + }, + { + title: "Condition Functions", + directoryPath: "conditional-functions/", + children: [ + "case", + "coalesce", + "if", + "ifnull", + "nullif" + ], + }, + { + title: "JSON Functions", + directoryPath: "json-functions/", + initialOpenGroupIndex: -1, + children: [ + "get_json_double", + "get_json_int", + "get_json_string", + "json_array", + "json_object", + "json_quote", + ], + }, + { + title: "Hash Functions", + directoryPath: "hash-functions/", + initialOpenGroupIndex: -1, + children: ["murmur_hash3_32"], + }, + { + title: "Math Functions", + directoryPath: "math-functions/", + initialOpenGroupIndex: -1, + children: [ + "conv", + "pmod" + ], + }, + { + title: "Encryption Functions", + directoryPath: "encrypt-dixgest-functions/", + initialOpenGroupIndex: -1, + children: [ + "aes", + "md5", + "md5sum", + "sm4", + "sm3", + "sm3sum" + ], + }, + { + title: "Table Functions", + directoryPath: "table-functions/", + initialOpenGroupIndex: -1, + children: [ + "explode-bitmap", + "explode-split", + "explode-json-array", + "outer-combinator" + ], + }, + "window-function", + "cast", + "digital-masking", + ], + }, + { + title: "SQL Reference", + directoryPath: "sql-reference-v2/", + initialOpenGroupIndex: -1, + children: [ + { + title: "Account Management", + directoryPath: "Account-Management-Statements/", + initialOpenGroupIndex: -1, + children: [ + "CREATE-USER", + "CREATE-ROLE", + "DROP-ROLE", + "DROP-USER", + "GRANT", + "REVOKE", + "SET-PASSWORD", + "SET-PROPERTY", + "LDAP", + ], + }, + { + title: "Cluster management", + directoryPath: "Cluster-Management-Statements/", + initialOpenGroupIndex: -1, + children: [ + "ALTER-SYSTEM-ADD-BACKEND", + "ALTER-SYSTEM-ADD-FOLLOWER", + "ALTER-SYSTEM-ADD-OBSERVER", + "ALTER-SYSTEM-DECOMMISSION-BACKEND", + "ALTER-SYSTEM-DROP-BACKEND", + "ALTER-SYSTEM-DROP-FOLLOWER", + "ALTER-SYSTEM-DROP-OBSERVER", + "ALTER-SYSTEM-MODIFY-BROKER", + "CANCEL-ALTER-SYSTEM", + ], + }, + { + title: "DDL", + directoryPath: "Data-Definition-Statements/", + initialOpenGroupIndex: -1, + children: [ + { + title: "Alter", + directoryPath: "Alter/", + initialOpenGroupIndex: -1, + children: [ + "ALTER-DATABASE", + "ALTER-SQL-BLOCK-RULE", + "ALTER-TABLE-COLUMN", + "ALTER-TABLE-PARTITION", + "ALTER-TABLE-PROPERTY", + "ALTER-TABLE-RENAME", + "ALTER-TABLE-REPLACE", + "ALTER-TABLE-ROLLUP", + "ALTER-VIEW", + "CANCEL-ALTER-TABLE", + ], + }, + { + title: "Backup and Restore", + directoryPath: "Backup-and-Restore/", + initialOpenGroupIndex: -1, + children: [ + "BACKUP", + "CANCEL-BACKUP", + "CANCEL-RESTORE", + "CREATE-REPOSITORY", + "DROP-REPOSITORY", + "RESTORE", + ], + }, + { + title: "Create", + directoryPath: "Create/", + initialOpenGroupIndex: -1, + children: [ + "CREATE-DATABASE", + "CREATE-ENCRYPT-KEY", + "CREATE-FILE", + "CREATE-FUNCTION", + "CREATE-INDEX", + "CREATE-MATERIALIZED-VIEW", + "CREATE-RESOURCE", + "CREATE-SQL-BLOCK-RULE", + "CREATE-TABLE-LIKE", + "CREATE-TABLE", + "CREATE-VIEW", + "CREATE-EXTERNAL-TABLE", + ], + }, + { + title: "Drop", + directoryPath: "Drop/", + initialOpenGroupIndex: -1, + children: [ + "DROP-DATABASE", + "DROP-ENCRYPT-KEY", + "DROP-FILE", + "DROP-FUNCTION", + "DROP-INDEX", + "DROP-MATERIALIZED-VIEW", + "DROP-RESOURCE", + "DROP-SQL-BLOCK-RULE", + "DROP-TABLE", + "TRUNCATE-TABLE", + ], + }, + ], + }, + { + title: "DML", + directoryPath: "Data-Manipulation-Statements/", + initialOpenGroupIndex: -1, + children: [ + { + title: "Load", + directoryPath: "Load/", + initialOpenGroupIndex: -1, + children: [ + "ALTER-ROUTINE-LOAD", + "BROKER-LOAD", + "CANCEL-LOAD", + "CREATE-ROUTINE-LOAD", + "PAUSE-ROUTINE-LOAD", + "RESUME-ROUTINE-LOAD", + "STOP-ROUTINE-LOAD", + "STREAM-LOAD", + "PAUSE-SYNC-JOB", + "RESUME-SYNC-JOB", + "STOP-SYNC-JOB", + "CREATE-SYNC-JOB", + ], + }, + { + title: "Manipulation", + directoryPath: "Manipulation/", + initialOpenGroupIndex: -1, + children: [ + "DELETE", + "INSERT", + "UPDATE", + ], + }, + "OUTFILE" + ], + }, + { + title: "Database Administration", + directoryPath: "Database-Administration-Statements/", + initialOpenGroupIndex: -1, + children: [ + "ADMIN-CANCEL-REPAIR", + "ADMIN-CHECK-TABLET", + "ADMIN-REPAIR-TABLE", + "ADMIN-SET-CONFIG", + "ADMIN-SET-REPLICA-STATUS", + "ADMIN-SHOW-CONFIG", + "ADMIN-SHOW-REPLICA-DISTRIBUTION", + "ADMIN-SHOW-REPLICA-STATUS", + "ENABLE-FEATURE", + "INSTALL-PLUGIN", + "KILL", + "RECOVER", + "SET-VARIABLE", + "UNINSTALL-PLUGIN", + ], + }, + { + title: "Show", + directoryPath: "Show-Statements/", + initialOpenGroupIndex: -1, + children: [ + "SHOW-ALTER", + "SHOW-BACKENDS", + "SHOW-BACKUP", + "SHOW-BROKER", + "SHOW-CHARSET", + "SHOW-COLLATION", + "SHOW-COLUMNS", + "SHOW-CREATE-DATABASE", + "SHOW-CREATE-FUNCTION", + "SHOW-CREATE-ROUTINE-LOAD", + "SHOW-CREATE-TABLE", + "SHOW-DATA", + "SHOW-DATABASE-ID", + "SHOW-DATABASES", + "SHOW-DELETE", + "SHOW-DYNAMIC-PARTITION", + "SHOW-ENCRYPT-KEY", + "SHOW-ENGINES", + "SHOW-EVENTS", + "SHOW-EXPORT", + "SHOW-FRONTENDS", + "SHOW-FUNCTIONS", + "SHOW-GRANTS", + "SHOW-INDEX", + "SHOW-LOAD-PROFILE", + "SHOW-LOAD-WARNINGS", + "SHOW-LOAD", + "SHOW-MIGRATIONS", + "SHOW-OPEN-TABLES", + "SHOW-PARTITION-ID", + "SHOW-PARTITIONS", + "SHOW-PLUGINS", + "SHOW-PROC", + "SHOW-PROCEDURE", + "SHOW-PROCESSLIST", + "SHOW-PROPERTY", + "SHOW-QUERY-PROFILE", + "SHOW-REPOSITORIES", + "SHOW-RESOURCES", + "SHOW-RESTORE", + "SHOW-ROLES", + "SHOW-ROLLUP", + "SHOW-ROUTINE-LOAD-TASK", + "SHOW-ROUTINE-LOAD", + "SHOW-SMALL-FILES", + "SHOW-SNAPSHOT", + "SHOW-SQL-BLOCK-RULE", + "SHOW-STATUS", + "SHOW-STREAM-LOAD", + "SHOW-SYNC-JOB", + "SHOW-TABLE-ID", + "SHOW-TABLE-STATUS", + "SHOW-TABLET", + "SHOW-TRANSACTION", + "SHOW-TRIGGERS", + "SHOW-TRASH", + "SHOW-USER", + "SHOW-VARIABLES", + "SHOW-VIEW", + "SHOW-WARNING", + "SHOW-WHITE-LIST", + ], + }, + { + title: "Data Types", + directoryPath: "Data-Types/", + initialOpenGroupIndex: -1, + children: [ + "BIGINT", + "BITMAP", + "BOOLEAN", + "CHAR", + "DATE", + "DATETIME", + "DECIMAL", + "DOUBLE", + "FLOAT", + "HLL", + "INT", + "SMALLINT", + "STRING", + "TINYINT", + "VARCHAR", + ], + }, + { + title: "Utility", + directoryPath: "Utility-Statements/", + initialOpenGroupIndex: -1, + children: [ + "DESCRIBE", + "HELP", + "USE", + ], + }, + ], + }, + ], + }, + { + title: "Admin Manual", + directoryPath: "admin-manual/", + initialOpenGroupIndex: -1, + children: [ + { + title: "cluster management", + directoryPath: "cluster-management/", + initialOpenGroupIndex: -1, + children: [ + "upgrade", + "elastic-expansion", + "load-balancing" + ], + }, + { + title: "Data Admin", + directoryPath: "data-admin/", + initialOpenGroupIndex: -1, + children: [ + "backup", + "restore", + "delete-recover" + ], + }, + "sql-interception", + "query-profile", + "optimization", + { + title: "Maintenance and Monitor", + directoryPath: "maint-monitor/", + initialOpenGroupIndex: -1, + children: [ + { + title: "Monitor Metrics", + directoryPath: "monitor-metrics/", + initialOpenGroupIndex: -1, + children: [ + "fe-metrics", + "be-metrics" + ], + }, + "disk-capacity", + "metadata-operation", + "tablet-meta-tool", + "tablet-repair-and-balance", + "tablet-restore-tool", + "monitor-alert", + "doris-error-code", + "be-olap-error-code" + ], + }, + { + title: "Config", + directoryPath: "config/", + initialOpenGroupIndex: -1, + children: [ + "fe-config", + "be-config", + "user-property" + ], + }, + { + title: "User Privilege and Ldap", + directoryPath: "privilege-ldap/", + initialOpenGroupIndex: -1, + children: [ + "user-privilege", + "ldap" + ], + }, + "multi-tenant", { title: "HTTP API", directoryPath: "http-actions/", @@ -162,597 +947,28 @@ module.exports = [ ], sidebarDepth: 1, }, - { - title: "Maintainence Operation", - directoryPath: "operation/", - initialOpenGroupIndex: -1, - children: [ - "doris-error-code", - "be-olap-error-code", - "disk-capacity", - "metadata-operation", - "monitor-alert", - "multi-tenant", - "tablet-meta-tool", - "tablet-repair-and-balance", - "tablet-restore-tool", - { - title: "Metrics", - directoryPath: "monitor-metrics/", - initialOpenGroupIndex: -1, - children: [ - "be-metrics", - "fe-metrics", - ], - }, - ], - sidebarDepth: 2, - }, - { - title: "Configuration", - directoryPath: "config/", - initialOpenGroupIndex: -1, - children: [ - "be_config", - "fe_config", - "user_property", - ], - sidebarDepth: 1, - }, - { - title: "Block Rule", - directoryPath: "block-rule/", - initialOpenGroupIndex: -1, - children: [ - "sql-block", - ], - sidebarDepth: 1, - }, - "backup-restore", - "bloomfilter", - "broker", - "colocation-join", - "bucket-shuffle-join", - "vectorized-execution-engine", - "dynamic-partition", - "export-manual", - "export_with_mysql_dump", - "outfile", - "privilege", - "ldap", - "resource-management", - "running-profile", - "runtime-filter", - "small-file-mgr", - "sql-mode", - "time-zone", - "variables", - "update", - "multi-tenant", - "orthogonal-bitmap-manual", ], - sidebarDepth: 1, }, { - title: "Benchmark & Sample", + title: "FQA", + directoryPath: "faq/", + initialOpenGroupIndex: -1, + children: [ + "install-faq", + "data-faq", + "sql-faq" + ], + }, + { + title: "Benchmark", directoryPath: "benchmark/", initialOpenGroupIndex: -1, children: [ - "star-schema-benchmark", - "systemd", - "samples" + "ssb", + "tpc-h" ], }, - { - title: "Extending Ability", - directoryPath: "extending-doris/", - initialOpenGroupIndex: -1, - children: [ - "audit-plugin", - "doris-on-es", - "logstash", - "odbc-of-doris", - "hive-of-doris", - "iceberg-of-doris", - "plugin-development-manual", - "spark-doris-connector", - "flink-doris-connector", - "datax", - { - title: "Seatunnel", - directoryPath: "seatunnel/", - initialOpenGroupIndex: -1, - children: [ - "spark-sink", - "flink-sink", - ], - }, - { - title: "UDF", - directoryPath: "udf/", - initialOpenGroupIndex: -1, - children: [ - "contribute-udf", - "native-user-defined-function", - "remote-user-defined-function", - "java-user-defined-function", - ], - }, - ], - }, - { - title: "Design Documents", - directoryPath: "internal/", - initialOpenGroupIndex: -1, - children: [ - "doris_storage_optimization", - "grouping_sets_design", - "metadata-design", - ], - }, - { - title: "SQL Manual", - directoryPath: "sql-reference/", - initialOpenGroupIndex: -1, - children: [ - { - title: "SQL Functions", - directoryPath: "sql-functions/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Date Time Functions", - directoryPath: "date-time-functions/", - initialOpenGroupIndex: -1, - children: [ - "convert_tz", - "curdate", - "current_timestamp", - "curtime", - "date_add", - "date_format", - "date_sub", - "datediff", - "day", - "dayname", - "dayofmonth", - "dayofweek", - "dayofyear", - "from_days", - "from_unixtime", - "hour", - "makedate", - "minute", - "month", - "monthname", - "now", - "second", - "str_to_date", - "time_round", - "timediff", - "timestampadd", - "timestampdiff", - "to_days", - "unix_timestamp", - "utc_timestamp", - "week", - "weekday", - "weekofyear", - "year", - "yearweek", - ], - }, - { - title: "Sptial Functions", - directoryPath: "spatial-functions/", - initialOpenGroupIndex: -1, - children: [ - "st_astext", - "st_circle", - "st_contains", - "st_distance_sphere", - "st_geometryfromtext", - "st_linefromtext", - "st_point", - "st_polygon", - "st_x", - "st_y", - ], - }, - { - title: "String Functions", - directoryPath: "string-functions/", - initialOpenGroupIndex: -1, - children: [ - "append_trailing_char_if_absent", - "ascii", - "bit_length", - "char_length", - "concat", - "concat_ws", - "ends_with", - "find_in_set", - "hex", - "instr", - "lcase", - "left", - "length", - "locate", - "lower", - "lpad", - "ltrim", - "money_format", - "null_or_empty", - "repeat", - "replace", - "reverse", - "right", - "rpad", - "split_part", - "starts_with", - "strleft", - "strright", - "substring", - "unhex", - { - title: "fuzzy match", - directoryPath: "like/", - initialOpenGroupIndex: -1, - children: [ - "like", - "not_like", - ], - }, - { - title: "regular match", - directoryPath: "regexp/", - initialOpenGroupIndex: -1, - children: [ - "regexp", - "regexp_extract", - "regexp_replace", - "not_regexp", - ], - }, - ], - }, - { - title: "Aggregate Functions", - directoryPath: "aggregate-functions/", - initialOpenGroupIndex: -1, - children: [ - "approx_count_distinct", - "avg", - "bitmap_union", - "count", - "group_concat", - "hll_union_agg", - "max", - "min", - "percentile", - "percentile_approx", - "stddev", - "stddev_samp", - "sum", - "topn", - "var_samp", - "variance", - ], - }, - { - title: "bitmap functions", - directoryPath: "bitmap-functions/", - initialOpenGroupIndex: -1, - children: [ - "bitmap_and", - "bitmap_contains", - "bitmap_empty", - "bitmap_from_string", - "bitmap_has_any", - "bitmap_has_all", - "bitmap_hash", - "bitmap_intersect", - "bitmap_or", - "bitmap_and_count", - "bitmap_or_count", - "bitmap_xor", - "bitmap_xor_count", - "bitmap_not", - "bitmap_and_not", - "bitmap_and_not_count", - "bitmap_subset_in_range", - "bitmap_subset_limit", - "sub_bitmap", - "bitmap_to_string", - "bitmap_union", - "bitmap_xor", - "to_bitmap", - "bitmap_max", - "orthogonal_bitmap_intersect", - "orthogonal_bitmap_intersect_count", - "orthogonal_bitmap_union_count", - ], - }, - { - title: "bitwise function", - directoryPath: "bitwise-functions/", - initialOpenGroupIndex: -1, - children: [ - "bitand", - "bitor", - "bitxor", - "bitnot" - ], - }, - { - title: "conditional function", - directoryPath: "conditional-functions/", - children: [ - "case", - "coalesce", - "if", - "ifnull", - "nullif" - ], - }, - { - title: "json function", - directoryPath: "json-functions/", - initialOpenGroupIndex: -1, - children: [ - "get_json_double", - "get_json_int", - "get_json_string", - "json_array", - "json_object", - "json_quote", - ], - }, - { - title: "Encryption and Digest Functions", - directoryPath: "encrypt-digest-functions/", - initialOpenGroupIndex: -1, - children: [ - "aes", - "md5", - "md5sum", - "sm4", - "sm3", - "sm3sum" - ], - }, - { - title: "Hash Functions", - directoryPath: "hash-functions/", - initialOpenGroupIndex: -1, - children: ["murmur_hash3_32"], - }, - { - title: "Math Functions", - directoryPath: "math-functions/", - initialOpenGroupIndex: -1, - children: [ - "conv", - "pmod" - ], - }, - { - title: "table functions", - directoryPath: "table-functions/", - initialOpenGroupIndex: -1, - children: [ - "explode-bitmap", - "explode-split", - "explode-json-array", - "explode-numbers", - "outer-combinator" - ], - }, - "window-function", - "cast", - "digital-masking", - ], - }, - { - title: "SQL Statements", - directoryPath: "sql-statements/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Account Management", - directoryPath: "Account Management/", - initialOpenGroupIndex: -1, - children: [ - "CREATE ROLE", - "CREATE USER", - "DROP ROLE", - "DROP USER", - "GRANT", - "REVOKE", - "SET PASSWORD", - "SET PROPERTY", - "SHOW GRANTS", - "SHOW ROLES", - ], - }, - { - title: "Administration", - directoryPath: "Administration/", - initialOpenGroupIndex: -1, - children: [ - "ADMIN CANCEL REBALANCE DISK", - "ADMIN CANCEL REPAIR", - "ADMIN CLEAN TRASH", - "ADMIN CHECK TABLET", - "ADMIN COMPACT", - "ADMIN REBALANCE DISK", - "ADMIN REPAIR", - "ADMIN SET CONFIG", - "ADMIN SET REPLICA STATUS", - "ADMIN SHOW CONFIG", - "ADMIN SHOW REPLICA DISTRIBUTION", - "ADMIN SHOW REPLICA STATUS", - "ADMIN-DIAGNOSE-TABLET", - "ADMIN SHOW TABLET STORAGE FORMAT", - "ALTER CLUSTER", - "ALTER SYSTEM", - "CANCEL DECOMMISSION", - "CREATE CLUSTER", - "CREATE FILE", - "DROP CLUSTER", - "DROP FILE", - "ENTER", - "INSTALL PLUGIN", - "LINK DATABASE", - "MIGRATE DATABASE", - "SET LDAP_ADMIN_PASSWORD", - "SHOW BACKENDS", - "SHOW BROKER", - "SHOW FILE", - "SHOW FRONTENDS", - "SHOW FULL COLUMNS", - "SHOW INDEX", - "SHOW MIGRATIONS", - "SHOW PLUGINS", - "SHOW TABLE STATUS", - "SHOW TRASH", - "UNINSTALL PLUGIN", - ], - }, - { - title: "Data Definition", - directoryPath: "Data Definition/", - initialOpenGroupIndex: -1, - children: [ - "ALTER DATABASE", - "ALTER RESOURCE", - "ALTER TABLE", - "ALTER VIEW", - "BACKUP", - "CANCEL ALTER", - "CANCEL BACKUP", - "CREATE ENCRYPTKEY", - "CANCEL RESTORE", - "CREATE DATABASE", - "CREATE INDEX", - "CREATE MATERIALIZED VIEW", - "CREATE REPOSITORY", - "CREATE RESOURCE", - "CREATE TABLE LIKE", - "CREATE TABLE", - "CREATE VIEW", - "Colocate Join", - "DROP DATABASE", - "DROP ENCRYPTKEY", - "DROP INDEX", - "DROP MATERIALIZED VIEW", - "DROP REPOSITORY", - "DROP RESOURCE", - "DROP TABLE", - "DROP VIEW", - "HLL", - "RECOVER", - "REFRESH DATABASE", - "REFRESH TABLE", - "RESTORE", - "SHOW ENCRYPTKEYS", - "SHOW RESOURCES", - "TRUNCATE TABLE", - "create-function", - "drop-function", - "show-functions", - ], - }, - { - title: "Data Manipulation", - directoryPath: "Data Manipulation/", - initialOpenGroupIndex: -1, - children: [ - "BEGIN", - "BROKER LOAD", - "CANCEL DELETE", - "CANCEL LABEL", - "CANCEL LOAD", - "CREATE SYNC JOB", - "DELETE", - "EXPORT", - "GET LABEL STATE", - "GROUP BY", - "LOAD", - "MINI LOAD", - "MULTI LOAD", - "PAUSE ROUTINE LOAD", - "PAUSE SYNC JOB", - "RESTORE TABLET", - "RESUME ROUTINE LOAD", - "RESUME SYNC JOB", - "ROUTINE LOAD", - "SHOW ALTER", - "SHOW BACKUP", - "SHOW CREATE FUNCTION", - "SHOW CREATE ROUTINE LOAD", - "SHOW DATA", - "SHOW DATA SKEW", - "SHOW DATABASES", - "SHOW DELETE", - "SHOW DYNAMIC PARTITION TABLES", - "SHOW EXPORT", - "SHOW LOAD", - "SHOW PARTITIONS", - "SHOW PROPERTY", - "SHOW REPOSITORIES", - "SHOW RESTORE", - "SHOW ROUTINE LOAD TASK", - "SHOW ROUTINE LOAD", - "SHOW SNAPSHOT", - "SHOW SYNC JOB", - "SHOW TABLES", - "SHOW TABLE CREATION", - "SHOW TABLET", - "SHOW TRANSACTION", - "SHOW-LAST-INSERT", - "STOP ROUTINE LOAD", - "STOP SYNC JOB", - "STREAM LOAD", - "alter-routine-load", - "insert", - "UPDATE", - "lateral-view", - ], - }, - { - title: "Data Types", - directoryPath: "Data Types/", - initialOpenGroupIndex: -1, - children: [ - "BIGINT", - "BITMAP", - "BOOLEAN", - "CHAR", - "DATE", - "DATETIME", - "DECIMAL", - "DOUBLE", - "FLOAT", - "HLL", - "INT", - "SMALLINT", - "STRING", - "TINYINT", - "VARCHAR", - ], - }, - { - title: "Utility", - directoryPath: "Utility/", - initialOpenGroupIndex: -1, - children: ["util_stmt"], - }, - ], - }, - ], - }, - { + { title: "Doris User", directoryPath: "case-user/", initialOpenGroupIndex: -1, @@ -780,15 +996,6 @@ module.exports = [ "bitmap-hll-file-format", ], }, - { - title: "FAQ", - directoryPath: "faq/", - initialOpenGroupIndex: -1, - children: [ - "faq", - "error" - ], - }, { title: "Apache Community", directoryPath: "community/", @@ -817,6 +1024,7 @@ module.exports = [ "release-prepare", "release-doris-core", "release-doris-connectors", + "release-doris-manager", "release-complete", "release-verify", ], @@ -824,4 +1032,4 @@ module.exports = [ "security", ], }, -] +]; diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index 324b9a7809..0cbc7b0edb 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -25,288 +25,249 @@ module.exports = [ sidebarDepth: 1, }, { - title: "编译与部署", - directoryPath: "installing/", + title: "快速开始", + directoryPath: "get-starting/", initialOpenGroupIndex: -1, children: [ - "compilation", - "compilation-with-ldb-toolchain", - "compilation-arm", - "install-deploy", - "upgrade", + "get-starting" ], }, { - title: "开始使用", - directoryPath: "getting-started/", + title: "Doris架构", + directoryPath: "summary/", initialOpenGroupIndex: -1, children: [ + "basic-summary", + "system-architecture" + ], + }, + { + title: "安装部署", + directoryPath: "install/", + initialOpenGroupIndex: -1, + children: [ + "install-deploy", + { + title: "源码编译", + directoryPath: "source-install/", + initialOpenGroupIndex: -1, + children: [ + "compilation", + "compilation-with-ldb-toolchain", + "compilation-arm" + ], + sidebarDepth: 2, + }, + ] + }, + { + title: "数据表设计", + directoryPath: "data-table/", + initialOpenGroupIndex: -1, + children: [ + "data-model", + "data-partition", "basic-usage", "advance-usage", - "data-model-rollup", - "data-partition", "hit-the-rollup", + "best-practice", + { + title: "索引", + directoryPath: "index/", + initialOpenGroupIndex: -1, + children: [ + "bloomfilter", + "prefix-index", + "bitmap-index" + ], + }, ], }, { - title: "操作手册", - directoryPath: "administrator-guide/", + title: "数据操作", + directoryPath: "data-operate/", initialOpenGroupIndex: -1, children: [ { title: "数据导入", - directoryPath: "load-data/", + directoryPath: "import/", initialOpenGroupIndex: -1, children: [ "load-manual", - "batch-delete-manual", - "binlog-load-manual", - "broker-load-manual", - "routine-load-manual", - "sequence-column-manual", - "spark-load-manual", - "stream-load-manual", - "s3-load-manual", - "delete-manual", - "insert-into-manual", - "load-json-format", + { + title: "按场景导入", + directoryPath: "import-scenes/", + initialOpenGroupIndex: -1, + children: [ + "local-file-load", + "external-storage-load", + "kafka-load", + "external-table-load", + "jdbc-load", + "load-atomicity", + "load-data-convert", + "load-strict-mode", + ], + }, + { + title: "按方式导入", + directoryPath: "import-way/", + initialOpenGroupIndex: -1, + children: [ + "binlog-load-manual", + "broker-load-manual", + "routine-load-manual", + "spark-load-manual", + "stream-load-manual", + "s3-load-manual", + "insert-into-manual", + "load-json-format", + ], + }, ], - sidebarDepth: 2, }, + { + title: "Export导出", + directoryPath: "export/", + initialOpenGroupIndex: -1, + children: [ + "export-manual", + "outfile", + "export_with_mysql_dump", + ], + }, + { + title: "数据更新及删除", + directoryPath: "update-delete/", + initialOpenGroupIndex: -1, + children: [ + "update", + "sequence-column-manual", + "delete-manual", + "batch-delete-manual" + ], + }, + ], + }, + { + title: "进阶使用", + directoryPath: "advanced/", + initialOpenGroupIndex: -1, + children: [ + "materialized-view", { title: "表结构变更", directoryPath: "alter-table/", initialOpenGroupIndex: -1, children: [ - "alter-table-bitmap-index", - "alter-table-replace-table", - "alter-table-rollup", - "alter-table-schema-change", - "alter-table-temp-partition", + "schema-change", + "replace-table" ], - sidebarDepth: 2, - }, - "materialized_view", - { - title: "HTTP API", - directoryPath: "http-actions/", - initialOpenGroupIndex: -1, - children: [ - { - title: "FE", - directoryPath: "fe/", - initialOpenGroupIndex: -1, - children: [ - { - title: "MANAGER", - directoryPath: "manager/", - initialOpenGroupIndex: -1, - children: [ - "cluster-action", - "node-action", - "query-profile-action", - ], - }, - "backends-action", - "bootstrap-action", - "cancel-load-action", - "check-decommission-action", - "check-storage-type-action", - "config-action", - "connection-action", - "get-ddl-stmt-action", - "get-load-info-action", - "get-load-state", - "get-log-file-action", - "get-small-file", - "ha-action", - "hardware-info-action", - "health-action", - "log-action", - "logout-action", - "meta-action", - "meta-info-action", - "meta-replay-state-action", - "profile-action", - "query-detail-action", - "query-profile-action", - "row-count-action", - "session-action", - "set-config-action", - "show-data-action", - "show-meta-info-action", - "show-proc-action", - "show-runtime-info-action", - "statement-execution-action", - "system-action", - "table-query-plan-action", - "table-row-count-action", - "table-schema-action", - "upload-action", - ], - }, - "cancel-label", - "check-reset-rpc-cache", - "compaction-action", - "connection-action", - "fe-get-log-file", - "get-load-state", - "get-tablets", - "profile-action", - "query-detail-action", - "restore-tablet", - "show-data-action", - "tablet-migration-action", - "tablets_distribution", - ], - sidebarDepth: 1, }, { - title: "运维操作", - directoryPath: "operation/", + title: "Doris表分区", + directoryPath: "partition/", initialOpenGroupIndex: -1, children: [ - "doris-error-code", - "be-olap-error-code", - "disk-capacity", - "metadata-operation", - "monitor-alert", - "multi-tenant", - "tablet-meta-tool", - "tablet-repair-and-balance", - "tablet-restore-tool", - { - title: "监控项", - directoryPath: "monitor-metrics/", - initialOpenGroupIndex: -1, - children: [ - "be-metrics", - "fe-metrics", - ], - }, + "dynamic-partition", + "table-tmp-partition" ], - sidebarDepth: 2, }, { - title: "配置文件", - directoryPath: "config/", + title: "Join优化", + directoryPath: "join-optimization/", initialOpenGroupIndex: -1, children: [ - "be_config", - "fe_config", - "user_property", + "bucket-shuffle-join", + "colocation-join", + "runtime-filter" ], - sidebarDepth: 1, }, { - title: "拦截规则", - directoryPath: "block-rule/", + title: "数据缓存", + directoryPath: "cache/", initialOpenGroupIndex: -1, children: [ - "sql-block", + "partition-cache" ], - sidebarDepth: 1, }, - "backup-restore", - "bloomfilter", - "broker", - "colocation-join", - "bucket-shuffle-join", "vectorized-execution-engine", - "dynamic-partition", - "export-manual", - "export_with_mysql_dump", - "outfile", - "partition_cache", - "privilege", - "ldap", - "resource-management", - "running-profile", - "runtime-filter", - "segment-v2-usage", - "small-file-mgr", - "sql-mode", - "time-zone", - "variables", - "update", - "multi-tenant", + "broker", + "resource", "orthogonal-bitmap-manual", - ], - sidebarDepth: 1, - }, - { - title: "性能测试及示例", - directoryPath: "benchmark/", - initialOpenGroupIndex: -1, - children: [ - "star-schema-benchmark", - "systemd", - "samples" + "variables", + "time-zone", + "small-file-mgr", + { + title: "最佳实践", + directoryPath: "best-practice/", + initialOpenGroupIndex: -1, + children: [ + "query-analysis", + "import-analysis", + "debug-log" + ], + } ], }, { - title: "扩展功能", - directoryPath: "extending-doris/", + title: "生态扩展", + directoryPath: "ecosystem/", initialOpenGroupIndex: -1, children: [ + { + title: "扩展表", + directoryPath: "external-table/", + initialOpenGroupIndex: -1, + children: [ + "doris-on-es", + "odbc-of-doris", + "hive-of-doris", + "iceberg-of-doris" + ], + }, "audit-plugin", - "doris-on-es", - "logstash", - "odbc-of-doris", - "hive-of-doris", - "iceberg-of-doris", - "plugin-development-manual", - "spark-doris-connector", "flink-doris-connector", + "spark-doris-connector", "datax", + "logstash", { title: "Doris Manager", directoryPath: "doris-manager/", + initialOpenGroupIndex: -1, children: [ "compiling-deploying", "initializing", - "space-list", - "system-settings", "cluster-managenent", - "space-management" + "space-list", + "space-management", + "system-settings" ], }, { - title: "Seatunnel", + title: "SeaTunnel", directoryPath: "seatunnel/", initialOpenGroupIndex: -1, children: [ - "spark-sink", "flink-sink", + "spark-sink" ], }, { - title: "UDF", + title: "自定义函数", directoryPath: "udf/", initialOpenGroupIndex: -1, children: [ - "contribute-udf", "native-user-defined-function", "remote-user-defined-function", - "java-user-defined-function", + "contribute-udf" ], }, ], }, { - title: "设计文档", - directoryPath: "internal/", - initialOpenGroupIndex: -1, - children: [ - "doris_storage_optimization", - "grouping_sets_design", - "metadata-design", - "spark_load", - ], - }, - { - title: "SQL 手册", - directoryPath: "sql-reference/", + title: "SQL手册", + directoryPath: "sql-manual/", initialOpenGroupIndex: -1, children: [ { @@ -442,7 +403,9 @@ module.exports = [ "group_concat", "hll_union_agg", "max", + "max_by", "min", + "min_by", "percentile", "percentile_approx", "stddev", @@ -539,7 +502,7 @@ module.exports = [ }, { title: "加密和信息摘要函数", - directoryPath: "encrypt-digest-functions/", + directoryPath: "encrypt-dixgest-functions/", initialOpenGroupIndex: -1, children: [ "aes", @@ -558,7 +521,6 @@ module.exports = [ "explode-bitmap", "explode-split", "explode-json-array", - "explode-numbers", "outer-combinator" ], }, @@ -568,171 +530,244 @@ module.exports = [ ], }, { - title: "语法帮助", - directoryPath: "sql-statements/", + title: "SQL手册", + directoryPath: "sql-reference-v2/", initialOpenGroupIndex: -1, children: [ { - title: "用户账户管理", - directoryPath: "Account Management/", + title: "账户管理", + directoryPath: "Account-Management-Statements/", initialOpenGroupIndex: -1, children: [ - "CREATE ROLE", - "CREATE USER", - "DROP ROLE", - "DROP USER", + "CREATE-USER", + "CREATE-ROLE", + "DROP-ROLE", + "DROP-USER", "GRANT", "REVOKE", - "SET PASSWORD", - "SET PROPERTY", - "SHOW GRANTS", - "SHOW ROLES", + "SET-PASSWORD", + "SET-PROPERTY", + "LDAP", ], }, { title: "集群管理", - directoryPath: "Administration/", + directoryPath: "Cluster-Management-Statements/", initialOpenGroupIndex: -1, children: [ - "ADMIN CANCEL REBALANCE DISK", - "ADMIN CANCEL REPAIR", - "ADMIN CLEAN TRASH", - "ADMIN CHECK TABLET", - "ADMIN COMPACT", - "ADMIN REBALANCE DISK", - "ADMIN REPAIR", - "ADMIN SET CONFIG", - "ADMIN SET REPLICA STATUS", - "ADMIN SHOW CONFIG", - "ADMIN SHOW REPLICA DISTRIBUTION", - "ADMIN SHOW REPLICA STATUS", - "ADMIN-DIAGNOSE-TABLET", - "ADMIN SHOW TABLET STORAGE FORMAT", - "ALTER CLUSTER", - "ALTER SYSTEM", - "CANCEL DECOMMISSION", - "CREATE CLUSTER", - "CREATE FILE", - "DROP CLUSTER", - "DROP FILE", - "ENTER", - "INSTALL PLUGIN", - "LINK DATABASE", - "MIGRATE DATABASE", - "SET LDAP_ADMIN_PASSWORD", - "SHOW BACKENDS", - "SHOW BROKER", - "SHOW FILE", - "SHOW FRONTENDS", - "SHOW FULL COLUMNS", - "SHOW INDEX", - "SHOW MIGRATIONS", - "SHOW PLUGINS", - "SHOW TABLE STATUS", - "SHOW TRASH", - "UNINSTALL PLUGIN", + "ALTER-SYSTEM-ADD-BACKEND", + "ALTER-SYSTEM-ADD-FOLLOWER", + "ALTER-SYSTEM-ADD-OBSERVER", + "ALTER-SYSTEM-DECOMMISSION-BACKEND", + "ALTER-SYSTEM-DROP-BACKEND", + "ALTER-SYSTEM-DROP-FOLLOWER", + "ALTER-SYSTEM-DROP-OBSERVER", + "ALTER-SYSTEM-MODIFY-BROKER", + "CANCEL-ALTER-SYSTEM", ], }, { title: "DDL", - directoryPath: "Data Definition/", + directoryPath: "Data-Definition-Statements/", initialOpenGroupIndex: -1, children: [ - "ALTER DATABASE", - "ALTER RESOURCE", - "ALTER TABLE", - "ALTER VIEW", - "BACKUP", - "CANCEL ALTER", - "CANCEL BACKUP", - "CANCEL RESTORE", - "CREATE DATABASE", - "CREATE ENCRYPTKEY", - "CREATE INDEX", - "CREATE MATERIALIZED VIEW", - "CREATE REPOSITORY", - "CREATE RESOURCE", - "CREATE TABLE LIKE", - "CREATE TABLE", - "CREATE VIEW", - "DROP DATABASE", - "DROP ENCRYPTKEY", - "DROP INDEX", - "DROP MATERIALIZED VIEW", - "DROP REPOSITORY", - "DROP RESOURCE", - "DROP TABLE", - "DROP VIEW", - "HLL", - "RECOVER", - "REFRESH DATABASE", - "REFRESH TABLE", - "RESTORE", - "SHOW ENCRYPTKEYS", - "SHOW RESOURCES", - "TRUNCATE TABLE", - "create-function", - "drop-function", - "show-functions", + { + title: "Alter", + directoryPath: "Alter/", + initialOpenGroupIndex: -1, + children: [ + "ALTER-DATABASE", + "ALTER-SQL-BLOCK-RULE", + "ALTER-TABLE-COLUMN", + "ALTER-TABLE-PARTITION", + "ALTER-TABLE-PROPERTY", + "ALTER-TABLE-RENAME", + "ALTER-TABLE-REPLACE", + "ALTER-TABLE-ROLLUP", + "ALTER-VIEW", + "CANCEL-ALTER-TABLE", + ], + }, + { + title: "备份及恢复", + directoryPath: "Backup-and-Restore/", + initialOpenGroupIndex: -1, + children: [ + "BACKUP", + "CANCEL-BACKUP", + "CANCEL-RESTORE", + "CREATE-REPOSITORY", + "DROP-REPOSITORY", + "RESTORE", + ], + }, + { + title: "Create", + directoryPath: "Create/", + initialOpenGroupIndex: -1, + children: [ + "CREATE-DATABASE", + "CREATE-ENCRYPT-KEY", + "CREATE-FILE", + "CREATE-FUNCTION", + "CREATE-INDEX", + "CREATE-MATERIALIZED-VIEW", + "CREATE-RESOURCE", + "CREATE-SQL-BLOCK-RULE", + "CREATE-TABLE-LIKE", + "CREATE-TABLE", + "CREATE-VIEW", + "CREATE-EXTERNAL-TABLE", + ], + }, + { + title: "Drop", + directoryPath: "Drop/", + initialOpenGroupIndex: -1, + children: [ + "DROP-DATABASE", + "DROP-ENCRYPT-KEY", + "DROP-FILE", + "DROP-FUNCTION", + "DROP-INDEX", + "DROP-MATERIALIZED-VIEW", + "DROP-RESOURCE", + "DROP-SQL-BLOCK-RULE", + "DROP-TABLE", + "TRUNCATE-TABLE", + ], + }, ], }, { title: "DML", - directoryPath: "Data Manipulation/", + directoryPath: "Data-Manipulation-Statements/", initialOpenGroupIndex: -1, children: [ - "BEGIN", - "BROKER LOAD", - "CANCEL LOAD", - "CREATE SYNC JOB", - "DELETE", - "EXPORT", - "GROUP BY", - "LOAD", - "MINI LOAD", - "MULTI LOAD", - "PAUSE ROUTINE LOAD", - "PAUSE SYNC JOB", - "RESUME ROUTINE LOAD", - "RESUME SYNC JOB", - "ROUTINE LOAD", - "SHOW ALTER", - "SHOW BACKUP", - "SHOW CREATE FUNCTION", - "SHOW CREATE ROUTINE LOAD", - "SHOW DATA", - "SHOW DATA SKEW", - "SHOW DATABASES", - "SHOW DELETE", - "SHOW DYNAMIC PARTITION TABLES", - "SHOW EXPORT", - "SHOW LOAD", - "SHOW PARTITIONS", - "SHOW PROPERTY", - "SHOW REPOSITORIES", - "SHOW RESTORE", - "SHOW ROUTINE LOAD TASK", - "SHOW ROUTINE LOAD", - "SHOW SNAPSHOT", - "SHOW SYNC JOB", - "SHOW TABLES", - "SHOW TABLE CREATION", - "SHOW TABLET", - "SHOW TRANSACTION", - "SHOW-LAST-INSERT", - "SPARK LOAD", - "STOP ROUTINE LOAD", - "STOP SYNC JOB", - "STREAM LOAD", - "alter-routine-load", - "insert", - "UPDATE", - "lateral-view", + { + title: "Load", + directoryPath: "Load/", + initialOpenGroupIndex: -1, + children: [ + "ALTER-ROUTINE-LOAD", + "BROKER-LOAD", + "CANCEL-LOAD", + "CREATE-ROUTINE-LOAD", + "PAUSE-ROUTINE-LOAD", + "RESUME-ROUTINE-LOAD", + "STOP-ROUTINE-LOAD", + "STREAM-LOAD", + "PAUSE-SYNC-JOB", + "RESUME-SYNC-JOB", + "STOP-SYNC-JOB", + "CREATE-SYNC-JOB", + ], + }, + { + title: "操作", + directoryPath: "Manipulation/", + initialOpenGroupIndex: -1, + children: [ + "DELETE", + "INSERT", + "UPDATE", + ], + }, + "OUTFILE" + ], + }, + { + title: "数据库管理", + directoryPath: "Database-Administration-Statements/", + initialOpenGroupIndex: -1, + children: [ + "ADMIN-CANCEL-REPAIR", + "ADMIN-CHECK-TABLET", + "ADMIN-REPAIR-TABLE", + "ADMIN-SET-CONFIG", + "ADMIN-SET-REPLICA-STATUS", + "ADMIN-SHOW-CONFIG", + "ADMIN-SHOW-REPLICA-DISTRIBUTION", + "ADMIN-SHOW-REPLICA-STATUS", + "ENABLE-FEATURE", + "INSTALL-PLUGIN", + "KILL", + "RECOVER", + "SET-VARIABLE", + "UNINSTALL-PLUGIN", + ], + }, + { + title: "Show", + directoryPath: "Show-Statements/", + initialOpenGroupIndex: -1, + children: [ + "SHOW-ALTER", + "SHOW-BACKENDS", + "SHOW-BACKUP", + "SHOW-BROKER", + "SHOW-CHARSET", + "SHOW-COLLATION", + "SHOW-COLUMNS", + "SHOW-CREATE-DATABASE", + "SHOW-CREATE-FUNCTION", + "SHOW-CREATE-ROUTINE-LOAD", + "SHOW-CREATE-TABLE", + "SHOW-DATA", + "SHOW-DATABASE-ID", + "SHOW-DATABASES", + "SHOW-DELETE", + "SHOW-DYNAMIC-PARTITION", + "SHOW-ENCRYPT-KEY", + "SHOW-ENGINES", + "SHOW-EVENTS", + "SHOW-EXPORT", + "SHOW-FRONTENDS", + "SHOW-FUNCTIONS", + "SHOW-GRANTS", + "SHOW-INDEX", + "SHOW-LOAD-PROFILE", + "SHOW-LOAD-WARNINGS", + "SHOW-LOAD", + "SHOW-MIGRATIONS", + "SHOW-OPEN-TABLES", + "SHOW-PARTITION-ID", + "SHOW-PARTITIONS", + "SHOW-PLUGINS", + "SHOW-PROC", + "SHOW-PROCEDURE", + "SHOW-PROCESSLIST", + "SHOW-PROPERTY", + "SHOW-QUERY-PROFILE", + "SHOW-REPOSITORIES", + "SHOW-RESOURCES", + "SHOW-RESTORE", + "SHOW-ROLES", + "SHOW-ROLLUP", + "SHOW-ROUTINE-LOAD-TASK", + "SHOW-ROUTINE-LOAD", + "SHOW-SMALL-FILES", + "SHOW-SNAPSHOT", + "SHOW-SQL-BLOCK-RULE", + "SHOW-STATUS", + "SHOW-STREAM-LOAD", + "SHOW-SYNC-JOB", + "SHOW-TABLE-ID", + "SHOW-TABLE-STATUS", + "SHOW-TABLET", + "SHOW-TRANSACTION", + "SHOW-TRIGGERS", + "SHOW-TRASH", + "SHOW-USER", + "SHOW-VARIABLES", + "SHOW-VIEW", + "SHOW-WARNING", + "SHOW-WHITE-LIST", ], }, { title: "数据类型", - directoryPath: "Data Types/", + directoryPath: "Data-Types/", initialOpenGroupIndex: -1, children: [ "BIGINT", @@ -746,7 +781,6 @@ module.exports = [ "FLOAT", "HLL", "INT", - "LARGEINT", "SMALLINT", "STRING", "TINYINT", @@ -755,14 +789,185 @@ module.exports = [ }, { title: "辅助命令", - directoryPath: "Utility/", + directoryPath: "Utility-Statements/", initialOpenGroupIndex: -1, - children: ["DESCRIBE"], + children: [ + "DESCRIBE", + "HELP", + "USE" + ], }, ], }, ], }, + { + title: "管理手册", + directoryPath: "admin-manual/", + initialOpenGroupIndex: -1, + children: [ + { + title: "集群管理", + directoryPath: "cluster-management/", + initialOpenGroupIndex: -1, + children: [ + "upgrade", + "elastic-expansion", + "load-balancing" + ], + }, + { + title: "数据管理", + directoryPath: "data-admin/", + initialOpenGroupIndex: -1, + children: [ + "backup", + "restore", + "delete-recover" + ], + }, + "sql-interception", + "query-profile", + "optimization", + { + title: "运维监控", + directoryPath: "maint-monitor/", + initialOpenGroupIndex: -1, + children: [ + { + title: "监控指标", + directoryPath: "monitor-metrics/", + initialOpenGroupIndex: -1, + children: [ + "fe-metrics", + "be-metrics" + ], + }, + "disk-capacity", + "metadata-operation", + "tablet-meta-tool", + "tablet-repair-and-balance", + "tablet-restore-tool", + "monitor-alert", + "doris-error-code", + "be-olap-error-code" + ], + }, + { + title: "配置管理", + directoryPath: "config/", + initialOpenGroupIndex: -1, + children: [ + "fe-config", + "be-config", + "user-property" + ], + }, + { + title: "用户权限及认证", + directoryPath: "privilege-ldap/", + initialOpenGroupIndex: -1, + children: [ + "user-privilege", + "ldap" + ], + }, + "multi-tenant", + { + title: "HTTP API", + directoryPath: "http-actions/", + initialOpenGroupIndex: -1, + children: [ + { + title: "FE", + directoryPath: "fe/", + initialOpenGroupIndex: -1, + children: [ + { + title: "MANAGER", + directoryPath: "manager/", + initialOpenGroupIndex: -1, + children: [ + "cluster-action", + "node-action", + "query-profile-action", + ], + }, + "backends-action", + "bootstrap-action", + "cancel-load-action", + "check-decommission-action", + "check-storage-type-action", + "config-action", + "connection-action", + "get-ddl-stmt-action", + "get-load-info-action", + "get-load-state", + "get-log-file-action", + "get-small-file", + "ha-action", + "hardware-info-action", + "health-action", + "log-action", + "logout-action", + "meta-action", + "meta-info-action", + "meta-replay-state-action", + "profile-action", + "query-detail-action", + "query-profile-action", + "row-count-action", + "session-action", + "set-config-action", + "show-data-action", + "show-meta-info-action", + "show-proc-action", + "show-runtime-info-action", + "statement-execution-action", + "system-action", + "table-query-plan-action", + "table-row-count-action", + "table-schema-action", + "upload-action", + ], + }, + "cancel-label", + "check-reset-rpc-cache", + "compaction-action", + "connection-action", + "fe-get-log-file", + "get-load-state", + "get-tablets", + "profile-action", + "query-detail-action", + "restore-tablet", + "show-data-action", + "tablet-migration-action", + "tablets_distribution", + ], + sidebarDepth: 1, + }, + ], + }, + { + title: "常见问题", + directoryPath: "faq/", + initialOpenGroupIndex: -1, + children: [ + "install-faq", + "data-faq", + "sql-faq" + ], + }, + { + title: "性能测试", + directoryPath: "benchmark/", + initialOpenGroupIndex: -1, + children: [ + "ssb", + "tpc-h" + ], + }, { title: "Doris用户", directoryPath: "case-user/", @@ -792,15 +997,6 @@ module.exports = [ "regression-testing", ], }, - { - title: "FAQ 常见问题", - directoryPath: "faq/", - initialOpenGroupIndex: -1, - children: [ - "faq", - "error" - ], - }, { title: "Apache 社区", directoryPath: "community/", @@ -829,6 +1025,7 @@ module.exports = [ "release-prepare", "release-doris-core", "release-doris-connectors", + "release-doris-manager", "release-complete", "release-verify", ], diff --git a/docs/en/README.md b/docs/en/README.md index 40040ee870..d5ddba4ad6 100644 --- a/docs/en/README.md +++ b/docs/en/README.md @@ -89,7 +89,7 @@ cases: - logo: /images/home/logo-youdao.png alt: 有道 actionText: Quick Start → -actionLink: /en/installing/compilation +actionLink: /en/get-starting/get-starting articleText: Latest News articleLink: /en/article/article-list --- diff --git a/new-docs/en/admin-manual/cluster-management/elastic-expansion.md b/docs/en/admin-manual/cluster-management/elastic-expansion.md similarity index 100% rename from new-docs/en/admin-manual/cluster-management/elastic-expansion.md rename to docs/en/admin-manual/cluster-management/elastic-expansion.md diff --git a/new-docs/en/admin-manual/cluster-management/load-balancing.md b/docs/en/admin-manual/cluster-management/load-balancing.md similarity index 100% rename from new-docs/en/admin-manual/cluster-management/load-balancing.md rename to docs/en/admin-manual/cluster-management/load-balancing.md diff --git a/new-docs/en/admin-manual/cluster-management/upgrade.md b/docs/en/admin-manual/cluster-management/upgrade.md similarity index 100% rename from new-docs/en/admin-manual/cluster-management/upgrade.md rename to docs/en/admin-manual/cluster-management/upgrade.md diff --git a/new-docs/en/admin-manual/config/be-config.md b/docs/en/admin-manual/config/be-config.md similarity index 100% rename from new-docs/en/admin-manual/config/be-config.md rename to docs/en/admin-manual/config/be-config.md diff --git a/new-docs/en/admin-manual/config/fe-config.md b/docs/en/admin-manual/config/fe-config.md similarity index 100% rename from new-docs/en/admin-manual/config/fe-config.md rename to docs/en/admin-manual/config/fe-config.md diff --git a/new-docs/en/admin-manual/config/user-property.md b/docs/en/admin-manual/config/user-property.md similarity index 100% rename from new-docs/en/admin-manual/config/user-property.md rename to docs/en/admin-manual/config/user-property.md diff --git a/new-docs/en/admin-manual/data-admin/backup.md b/docs/en/admin-manual/data-admin/backup.md similarity index 100% rename from new-docs/en/admin-manual/data-admin/backup.md rename to docs/en/admin-manual/data-admin/backup.md diff --git a/new-docs/en/admin-manual/data-admin/delete-recover.md b/docs/en/admin-manual/data-admin/delete-recover.md similarity index 100% rename from new-docs/en/admin-manual/data-admin/delete-recover.md rename to docs/en/admin-manual/data-admin/delete-recover.md diff --git a/new-docs/en/admin-manual/data-admin/restore.md b/docs/en/admin-manual/data-admin/restore.md similarity index 100% rename from new-docs/en/admin-manual/data-admin/restore.md rename to docs/en/admin-manual/data-admin/restore.md diff --git a/docs/en/administrator-guide/http-actions/cancel-label.md b/docs/en/admin-manual/http-actions/cancel-label.md similarity index 100% rename from docs/en/administrator-guide/http-actions/cancel-label.md rename to docs/en/admin-manual/http-actions/cancel-label.md diff --git a/docs/en/administrator-guide/http-actions/check-reset-rpc-cache.md b/docs/en/admin-manual/http-actions/check-reset-rpc-cache.md similarity index 100% rename from docs/en/administrator-guide/http-actions/check-reset-rpc-cache.md rename to docs/en/admin-manual/http-actions/check-reset-rpc-cache.md diff --git a/docs/en/administrator-guide/http-actions/compaction-action.md b/docs/en/admin-manual/http-actions/compaction-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/compaction-action.md rename to docs/en/admin-manual/http-actions/compaction-action.md diff --git a/docs/en/administrator-guide/http-actions/connection-action.md b/docs/en/admin-manual/http-actions/connection-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/connection-action.md rename to docs/en/admin-manual/http-actions/connection-action.md diff --git a/docs/en/administrator-guide/http-actions/fe-get-log-file.md b/docs/en/admin-manual/http-actions/fe-get-log-file.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe-get-log-file.md rename to docs/en/admin-manual/http-actions/fe-get-log-file.md diff --git a/docs/en/administrator-guide/http-actions/fe/backends-action.md b/docs/en/admin-manual/http-actions/fe/backends-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/backends-action.md rename to docs/en/admin-manual/http-actions/fe/backends-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/bootstrap-action.md b/docs/en/admin-manual/http-actions/fe/bootstrap-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/bootstrap-action.md rename to docs/en/admin-manual/http-actions/fe/bootstrap-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/cancel-load-action.md b/docs/en/admin-manual/http-actions/fe/cancel-load-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/cancel-load-action.md rename to docs/en/admin-manual/http-actions/fe/cancel-load-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/check-decommission-action.md b/docs/en/admin-manual/http-actions/fe/check-decommission-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/check-decommission-action.md rename to docs/en/admin-manual/http-actions/fe/check-decommission-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/check-storage-type-action.md b/docs/en/admin-manual/http-actions/fe/check-storage-type-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/check-storage-type-action.md rename to docs/en/admin-manual/http-actions/fe/check-storage-type-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/config-action.md b/docs/en/admin-manual/http-actions/fe/config-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/config-action.md rename to docs/en/admin-manual/http-actions/fe/config-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/connection-action.md b/docs/en/admin-manual/http-actions/fe/connection-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/connection-action.md rename to docs/en/admin-manual/http-actions/fe/connection-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/get-ddl-stmt-action.md b/docs/en/admin-manual/http-actions/fe/get-ddl-stmt-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/get-ddl-stmt-action.md rename to docs/en/admin-manual/http-actions/fe/get-ddl-stmt-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/get-load-info-action.md b/docs/en/admin-manual/http-actions/fe/get-load-info-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/get-load-info-action.md rename to docs/en/admin-manual/http-actions/fe/get-load-info-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/get-load-state.md b/docs/en/admin-manual/http-actions/fe/get-load-state.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/get-load-state.md rename to docs/en/admin-manual/http-actions/fe/get-load-state.md diff --git a/docs/en/administrator-guide/http-actions/fe/get-log-file-action.md b/docs/en/admin-manual/http-actions/fe/get-log-file-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/get-log-file-action.md rename to docs/en/admin-manual/http-actions/fe/get-log-file-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/get-small-file.md b/docs/en/admin-manual/http-actions/fe/get-small-file.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/get-small-file.md rename to docs/en/admin-manual/http-actions/fe/get-small-file.md diff --git a/docs/en/administrator-guide/http-actions/fe/ha-action.md b/docs/en/admin-manual/http-actions/fe/ha-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/ha-action.md rename to docs/en/admin-manual/http-actions/fe/ha-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/hardware-info-action.md b/docs/en/admin-manual/http-actions/fe/hardware-info-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/hardware-info-action.md rename to docs/en/admin-manual/http-actions/fe/hardware-info-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/health-action.md b/docs/en/admin-manual/http-actions/fe/health-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/health-action.md rename to docs/en/admin-manual/http-actions/fe/health-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/log-action.md b/docs/en/admin-manual/http-actions/fe/log-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/log-action.md rename to docs/en/admin-manual/http-actions/fe/log-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/logout-action.md b/docs/en/admin-manual/http-actions/fe/logout-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/logout-action.md rename to docs/en/admin-manual/http-actions/fe/logout-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/manager/cluster-action.md b/docs/en/admin-manual/http-actions/fe/manager/cluster-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/manager/cluster-action.md rename to docs/en/admin-manual/http-actions/fe/manager/cluster-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/manager/node-action.md b/docs/en/admin-manual/http-actions/fe/manager/node-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/manager/node-action.md rename to docs/en/admin-manual/http-actions/fe/manager/node-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/manager/query-profile-action.md b/docs/en/admin-manual/http-actions/fe/manager/query-profile-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/manager/query-profile-action.md rename to docs/en/admin-manual/http-actions/fe/manager/query-profile-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/meta-action.md b/docs/en/admin-manual/http-actions/fe/meta-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/meta-action.md rename to docs/en/admin-manual/http-actions/fe/meta-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/meta-info-action.md b/docs/en/admin-manual/http-actions/fe/meta-info-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/meta-info-action.md rename to docs/en/admin-manual/http-actions/fe/meta-info-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/meta-replay-state-action.md b/docs/en/admin-manual/http-actions/fe/meta-replay-state-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/meta-replay-state-action.md rename to docs/en/admin-manual/http-actions/fe/meta-replay-state-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/profile-action.md b/docs/en/admin-manual/http-actions/fe/profile-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/profile-action.md rename to docs/en/admin-manual/http-actions/fe/profile-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/query-detail-action.md b/docs/en/admin-manual/http-actions/fe/query-detail-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/query-detail-action.md rename to docs/en/admin-manual/http-actions/fe/query-detail-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/query-profile-action.md b/docs/en/admin-manual/http-actions/fe/query-profile-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/query-profile-action.md rename to docs/en/admin-manual/http-actions/fe/query-profile-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/row-count-action.md b/docs/en/admin-manual/http-actions/fe/row-count-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/row-count-action.md rename to docs/en/admin-manual/http-actions/fe/row-count-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/session-action.md b/docs/en/admin-manual/http-actions/fe/session-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/session-action.md rename to docs/en/admin-manual/http-actions/fe/session-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/set-config-action.md b/docs/en/admin-manual/http-actions/fe/set-config-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/set-config-action.md rename to docs/en/admin-manual/http-actions/fe/set-config-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/show-data-action.md b/docs/en/admin-manual/http-actions/fe/show-data-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/show-data-action.md rename to docs/en/admin-manual/http-actions/fe/show-data-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/show-meta-info-action.md b/docs/en/admin-manual/http-actions/fe/show-meta-info-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/show-meta-info-action.md rename to docs/en/admin-manual/http-actions/fe/show-meta-info-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/show-proc-action.md b/docs/en/admin-manual/http-actions/fe/show-proc-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/show-proc-action.md rename to docs/en/admin-manual/http-actions/fe/show-proc-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/show-runtime-info-action.md b/docs/en/admin-manual/http-actions/fe/show-runtime-info-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/show-runtime-info-action.md rename to docs/en/admin-manual/http-actions/fe/show-runtime-info-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/statement-execution-action.md b/docs/en/admin-manual/http-actions/fe/statement-execution-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/statement-execution-action.md rename to docs/en/admin-manual/http-actions/fe/statement-execution-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/system-action.md b/docs/en/admin-manual/http-actions/fe/system-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/system-action.md rename to docs/en/admin-manual/http-actions/fe/system-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/table-query-plan-action.md b/docs/en/admin-manual/http-actions/fe/table-query-plan-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/table-query-plan-action.md rename to docs/en/admin-manual/http-actions/fe/table-query-plan-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/table-row-count-action.md b/docs/en/admin-manual/http-actions/fe/table-row-count-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/table-row-count-action.md rename to docs/en/admin-manual/http-actions/fe/table-row-count-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/table-schema-action.md b/docs/en/admin-manual/http-actions/fe/table-schema-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/table-schema-action.md rename to docs/en/admin-manual/http-actions/fe/table-schema-action.md diff --git a/docs/en/administrator-guide/http-actions/fe/upload-action.md b/docs/en/admin-manual/http-actions/fe/upload-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/fe/upload-action.md rename to docs/en/admin-manual/http-actions/fe/upload-action.md diff --git a/docs/en/administrator-guide/http-actions/get-load-state.md b/docs/en/admin-manual/http-actions/get-load-state.md similarity index 100% rename from docs/en/administrator-guide/http-actions/get-load-state.md rename to docs/en/admin-manual/http-actions/get-load-state.md diff --git a/docs/en/administrator-guide/http-actions/get-tablets.md b/docs/en/admin-manual/http-actions/get-tablets.md similarity index 100% rename from docs/en/administrator-guide/http-actions/get-tablets.md rename to docs/en/admin-manual/http-actions/get-tablets.md diff --git a/docs/en/administrator-guide/http-actions/profile-action.md b/docs/en/admin-manual/http-actions/profile-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/profile-action.md rename to docs/en/admin-manual/http-actions/profile-action.md diff --git a/docs/en/administrator-guide/http-actions/query-detail-action.md b/docs/en/admin-manual/http-actions/query-detail-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/query-detail-action.md rename to docs/en/admin-manual/http-actions/query-detail-action.md diff --git a/docs/en/administrator-guide/http-actions/restore-tablet.md b/docs/en/admin-manual/http-actions/restore-tablet.md similarity index 100% rename from docs/en/administrator-guide/http-actions/restore-tablet.md rename to docs/en/admin-manual/http-actions/restore-tablet.md diff --git a/docs/en/administrator-guide/http-actions/show-data-action.md b/docs/en/admin-manual/http-actions/show-data-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/show-data-action.md rename to docs/en/admin-manual/http-actions/show-data-action.md diff --git a/docs/en/administrator-guide/http-actions/tablet-migration-action.md b/docs/en/admin-manual/http-actions/tablet-migration-action.md similarity index 100% rename from docs/en/administrator-guide/http-actions/tablet-migration-action.md rename to docs/en/admin-manual/http-actions/tablet-migration-action.md diff --git a/docs/en/administrator-guide/http-actions/tablets_distribution.md b/docs/en/admin-manual/http-actions/tablets_distribution.md similarity index 100% rename from docs/en/administrator-guide/http-actions/tablets_distribution.md rename to docs/en/admin-manual/http-actions/tablets_distribution.md diff --git a/new-docs/en/admin-manual/maint-monitor/be-olap-error-code.md b/docs/en/admin-manual/maint-monitor/be-olap-error-code.md similarity index 100% rename from new-docs/en/admin-manual/maint-monitor/be-olap-error-code.md rename to docs/en/admin-manual/maint-monitor/be-olap-error-code.md diff --git a/new-docs/en/admin-manual/maint-monitor/disk-capacity.md b/docs/en/admin-manual/maint-monitor/disk-capacity.md similarity index 100% rename from new-docs/en/admin-manual/maint-monitor/disk-capacity.md rename to docs/en/admin-manual/maint-monitor/disk-capacity.md diff --git a/docs/en/administrator-guide/operation/doris-error-code.md b/docs/en/admin-manual/maint-monitor/doris-error-code.md similarity index 100% rename from docs/en/administrator-guide/operation/doris-error-code.md rename to docs/en/admin-manual/maint-monitor/doris-error-code.md diff --git a/new-docs/en/admin-manual/maint-monitor/metadata-operation.md b/docs/en/admin-manual/maint-monitor/metadata-operation.md similarity index 100% rename from new-docs/en/admin-manual/maint-monitor/metadata-operation.md rename to docs/en/admin-manual/maint-monitor/metadata-operation.md diff --git a/docs/en/administrator-guide/operation/monitor-alert.md b/docs/en/admin-manual/maint-monitor/monitor-alert.md similarity index 100% rename from docs/en/administrator-guide/operation/monitor-alert.md rename to docs/en/admin-manual/maint-monitor/monitor-alert.md diff --git a/docs/en/administrator-guide/operation/monitor-metrics/be-metrics.md b/docs/en/admin-manual/maint-monitor/monitor-metrics/be-metrics.md similarity index 100% rename from docs/en/administrator-guide/operation/monitor-metrics/be-metrics.md rename to docs/en/admin-manual/maint-monitor/monitor-metrics/be-metrics.md diff --git a/docs/en/administrator-guide/operation/monitor-metrics/fe-metrics.md b/docs/en/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md similarity index 100% rename from docs/en/administrator-guide/operation/monitor-metrics/fe-metrics.md rename to docs/en/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md diff --git a/docs/en/administrator-guide/operation/multi-tenant.md b/docs/en/admin-manual/maint-monitor/multi-tenant.md similarity index 100% rename from docs/en/administrator-guide/operation/multi-tenant.md rename to docs/en/admin-manual/maint-monitor/multi-tenant.md diff --git a/new-docs/en/admin-manual/maint-monitor/tablet-meta-tool.md b/docs/en/admin-manual/maint-monitor/tablet-meta-tool.md similarity index 100% rename from new-docs/en/admin-manual/maint-monitor/tablet-meta-tool.md rename to docs/en/admin-manual/maint-monitor/tablet-meta-tool.md diff --git a/new-docs/en/admin-manual/maint-monitor/tablet-repair-and-balance.md b/docs/en/admin-manual/maint-monitor/tablet-repair-and-balance.md similarity index 100% rename from new-docs/en/admin-manual/maint-monitor/tablet-repair-and-balance.md rename to docs/en/admin-manual/maint-monitor/tablet-repair-and-balance.md diff --git a/docs/en/administrator-guide/operation/tablet-restore-tool.md b/docs/en/admin-manual/maint-monitor/tablet-restore-tool.md similarity index 100% rename from docs/en/administrator-guide/operation/tablet-restore-tool.md rename to docs/en/admin-manual/maint-monitor/tablet-restore-tool.md diff --git a/new-docs/en/admin-manual/multi-tenant.md b/docs/en/admin-manual/multi-tenant.md similarity index 100% rename from new-docs/en/admin-manual/multi-tenant.md rename to docs/en/admin-manual/multi-tenant.md diff --git a/new-docs/en/admin-manual/optimization.md b/docs/en/admin-manual/optimization.md similarity index 100% rename from new-docs/en/admin-manual/optimization.md rename to docs/en/admin-manual/optimization.md diff --git a/new-docs/en/admin-manual/privilege-ldap/ldap.md b/docs/en/admin-manual/privilege-ldap/ldap.md similarity index 100% rename from new-docs/en/admin-manual/privilege-ldap/ldap.md rename to docs/en/admin-manual/privilege-ldap/ldap.md diff --git a/new-docs/en/admin-manual/privilege-ldap/user-privilege.md b/docs/en/admin-manual/privilege-ldap/user-privilege.md similarity index 100% rename from new-docs/en/admin-manual/privilege-ldap/user-privilege.md rename to docs/en/admin-manual/privilege-ldap/user-privilege.md diff --git a/new-docs/en/admin-manual/query-profile.md b/docs/en/admin-manual/query-profile.md similarity index 100% rename from new-docs/en/admin-manual/query-profile.md rename to docs/en/admin-manual/query-profile.md diff --git a/new-docs/en/admin-manual/sql-interception.md b/docs/en/admin-manual/sql-interception.md similarity index 100% rename from new-docs/en/admin-manual/sql-interception.md rename to docs/en/admin-manual/sql-interception.md diff --git a/docs/en/administrator-guide/alter-table/alter-table-bitmap-index.md b/docs/en/administrator-guide/alter-table/alter-table-bitmap-index.md deleted file mode 100644 index 34506e5e33..0000000000 --- a/docs/en/administrator-guide/alter-table/alter-table-bitmap-index.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -{ - "title": "Bitmap Index", - "language": "en" -} ---- - - - -# Bitmap Index -Users can speed up queries by creating a bitmap index -This document focuses on how to create an index job, as well as some considerations and frequently asked questions when creating an index. - -## Glossary -* bitmap index: a fast data structure that speeds up queries - -## Basic Principles -Creating and dropping index is essentially a schema change job. For details, please refer to -[Schema Change](alter-table-schema-change.html). - -## Syntax -There are two forms of index creation and modification related syntax, one is integrated with alter table statement, and the other is using separate -create/drop index syntax -1. Create Index - - Please refer to [CREATE INDEX](../../sql-reference/sql-statements/Data%20Definition/CREATE%20INDEX.html) - or [ALTER TABLE](../../sql-reference/sql-statements/Data%20Definition/ALTER%20TABLE.html), - You can also specify a bitmap index when creating a table, Please refer to [CREATE TABLE](../../sql-reference/sql-statements/Data%20Definition/CREATE%20TABLE.html) - -2. Show Index - - Please refer to [SHOW INDEX](../../sql-reference/sql-statements/Administration/SHOW%20INDEX.html) - -3. Drop Index - - Please refer to [DROP INDEX](../../sql-reference/sql-statements/Data%20Definition/DROP%20INDEX.html) or [ALTER TABLE](../../sql-reference/sql-statements/Data%20Definition/ALTER%20TABLE.html) - -## Create Job -Please refer to [Schema Change](alter-table-schema-change.html) -## View Job -Please refer to [Schema Change](alter-table-schema-change.html) - -## Cancel Job -Please refer to [Schema Change](alter-table-schema-change.html) - -## Notice -* Currently only index of bitmap type is supported. -* The bitmap index is only created on a single column. -* Bitmap indexes can be applied to all columns of the `Duplicate` data model and key columns of the `Aggregate` and `Uniq` models. -* The data types supported by bitmap indexes are as follows: - * `TINYINT` - * `SMALLINT` - * `INT` - * `UNSIGNEDINT` - * `BIGINT` - * `CHAR` - * `VARCHAR` - * `DATE` - * `DATETIME` - * `LARGEINT` - * `DECIMAL` - * `BOOL` -* The bitmap index takes effect only in segmentV2. The table's storage format will be converted to V2 automatically when creating index. diff --git a/docs/en/administrator-guide/alter-table/alter-table-replace-table.md b/docs/en/administrator-guide/alter-table/alter-table-replace-table.md deleted file mode 100644 index 02532988d8..0000000000 --- a/docs/en/administrator-guide/alter-table/alter-table-replace-table.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -{ - "title": "Replace Table", - "language": "en" -} ---- - - - -# Replace Table - -In version 0.14, Doris supports atomic replacement of two tables. -This operation only applies to OLAP tables. - -For partition level replacement operations, please refer to [Temporary Partition Document](./alter-table-temp-partition.md) - -## Syntax - -``` -ALTER TABLE [db.]tbl1 REPLACE WITH tbl2 -[PROPERTIES('swap' = 'true')]; -``` - -Replace table `tbl1` with table `tbl2`. - -If the `swap` parameter is `true`, after replacement, the data in the table named `tbl1` is the data in the original `tbl2` table. The data in the table named `tbl2` is the data in the original table `tbl1`. That is, the data of the two tables are interchanged. - -If the `swap` parameter is `false`, after replacement, the data in the table named `tbl1` is the data in the original `tbl2` table. The table named `tbl2` is dropped. - -## Principle - -The replacement table function actually turns the following set of operations into an atomic operation. - -Suppose you want to replace table A with table B, and `swap` is `true`, the operation is as follows: - -1. Rename table B to table A. -2. Rename table A to table B. - -If `swap` is `false`, the operation is as follows: - -1. Drop table A. -2. Rename table B to table A. - -## Notice - -1. The `swap` parameter defaults to `true`. That is, the replacement table operation is equivalent to the exchange of two table data. -2. If the `swap` parameter is set to `false`, the replaced table (table A) will be dropped and cannot be recovered. -3. The replacement operation can only occur between two OLAP tables, and the table structure of the two tables is not checked for consistency. -4. The replacement operation will not change the original permission settings. Because the permission check is based on the table name. - -## Best Practices - -1. Atomic Overwrite Operation - - In some cases, the user wants to be able to rewrite the data of a certain table, but if it is dropped and then imported, there will be a period of time in which the data cannot be viewed. At this time, the user can first use the `CREATE TABLE LIKE` statement to create a new table with the same structure, import the new data into the new table, and replace the old table atomically through the replacement operation to achieve the goal. For partition level atomic overwrite operation, please refer to [Temporary partition document](./alter-table-temp-partition.md) diff --git a/docs/en/administrator-guide/alter-table/alter-table-rollup.md b/docs/en/administrator-guide/alter-table/alter-table-rollup.md deleted file mode 100644 index a3df4c8dc2..0000000000 --- a/docs/en/administrator-guide/alter-table/alter-table-rollup.md +++ /dev/null @@ -1,188 +0,0 @@ ---- -{ - "title": "Rollup", - "language": "en" -} ---- - - - -# Rollup - -Users can speed up queries by creating rollup tables. For the concept and usage of Rollup, please refer to [Data - Model, ROLLUP and Prefix Index](../../getting-started/data-model-rollup_EN.md) and - [Rollup and query](../../getting-started/hit-the-rollup_EN.md). - -This document focuses on how to create a Rollup job, as well as some considerations and frequently asked questions about creating a Rollup. - -## Glossary - -* Base Table: When each table is created, it corresponds to a base table. The base table stores the complete data of this table. Rollups are usually created based on the data in the base table (and can also be created from other rollups). -* Index: Materialized index. Rollup or Base Table are both called materialized indexes. -* Transaction: Each import task is a transaction, and each transaction has a unique incrementing Transaction ID. - -## Basic Principles - -The basic process of creating a Rollup is to generate a new Rollup data containing the specified column from the data in the Base table. Among them, two parts of data conversion are needed. One is the conversion of existing historical data, and the other is the conversion of newly arrived imported data during Rollup execution. - -``` -+----------+ -| Load Job | -+----+-----+ - | - | Load job generates both base and rollup index data - | - | +------------------+ +---------------+ - | | Base Index | | Base Index | - +------> New Incoming Data| | History Data | - | +------------------+ +------+--------+ - | | - | | Convert history data - | | - | +------------------+ +------v--------+ - | | Rollup Index | | Rollup Index | - +------> New Incoming Data| | History Data | - +------------------+ +---------------+ -``` - -Before starting the conversion of historical data, Doris will obtain a latest transaction ID. And wait for all import transactions before this Transaction ID to complete. This Transaction ID becomes a watershed. This means that Doris guarantees that all import tasks after the watershed will generate data for the Rollup Index at the same time. In this way, after the historical data conversion is completed, the data of the Rollup and Base tables can be guaranteed to be flush. - -## Create Job - -The specific syntax for creating a Rollup can be found in the description of the Rollup section in the help `HELP ALTER TABLE`. - -The creation of Rollup is an asynchronous process. After the job is submitted successfully, the user needs to use the `SHOW ALTER TABLE ROLLUP` command to view the progress of the job. - -## View Job - -`SHOW ALTER TABLE ROLLUP` You can view rollup jobs that are currently executing or completed. For example: - -``` - JobId: 20037 - TableName: tbl1 - CreateTime: 2019-08-06 15:38:49 - FinishedTime: N/A - BaseIndexName: tbl1 -RollupIndexName: r1 - RollupId: 20038 - TransactionId: 10034 - State: PENDING - Msg: - Progress: N/A - Timeout: 86400 -``` - -* JobId: A unique ID for each Rollup job. -* TableName: The table name of the base table corresponding to Rollup. -* CreateTime: Job creation time. -* FinishedTime: The end time of the job. If it is not finished, "N / A" is displayed. -* BaseIndexName: The name of the source Index corresponding to Rollup. -* RollupIndexName: The name of the Rollup. -* RollupId: The unique ID of the Rollup. -* TransactionId: the watershed transaction ID of the conversion history data. -* State: The phase of the operation. -     * PENDING: The job is waiting in the queue to be scheduled. -     * WAITING_TXN: Wait for the import task before the watershed transaction ID to complete. -     * RUNNING: Historical data conversion. -     * FINISHED: The operation was successful. -     * CANCELLED: The job failed. -* Msg: If the job fails, a failure message is displayed here. -* Progress: operation progress. Progress is displayed only in the RUNNING state. Progress is displayed in M / N. Where N is the total number of copies of Rollup. M is the number of copies of historical data conversion completed. -* Timeout: Job timeout time. Unit of second. - -## Cancel Job - -In the case that the job status is not FINISHED or CANCELLED, you can cancel the Rollup job with the following command: - -`CANCEL ALTER TABLE ROLLUP FROM tbl_name;` - -## Notice - -* A table can have only one Rollup job running at a time. And only one rollup can be created in a job. - -* Rollup operations do not block import and query operations. - -* If a DELETE operation has a Key column in a where condition that does not exist in a Rollup, the DELETE is not allowed. - -    If a Key column does not exist in a Rollup, the DELETE operation cannot delete data from the Rollup, so the data consistency between the Rollup table and the Base table cannot be guaranteed. - -* Rollup columns must exist in the Base table. - -    Rollup columns are always a subset of the Base table columns. Columns that do not exist in the Base table cannot appear. - -* If a rollup contains columns of the REPLACE aggregation type, the rollup must contain all the key columns. - -    Assume the structure of the Base table is as follows: -     -    `` `(k1 INT, k2 INT, v1 INT REPLACE, v2 INT SUM)` `` -     -    If you need to create a Rollup that contains `v1` columns, you must include the` k1`, `k2` columns. Otherwise, the system cannot determine the value of `v1` listed in Rollup. -     -    Note that all Value columns in the Unique data model table are of the REPLACE aggregation type. -     -* Rollup of the DUPLICATE data model table, you can specify the DUPLICATE KEY of the rollup. - -    The DUPLICATE KEY in the DUPLICATE data model table is actually sorted. Rollup can specify its own sort order, but the sort order must be a prefix of the Rollup column order. If not specified, the system will check if the Rollup contains all sort columns of the Base table, and if it does not, it will report an error. For example: -     -    Base table structure: `(k1 INT, k2 INT, k3 INT) DUPLICATE KEY (k1, k2)` -     -    Rollup can be: `(k2 INT, k1 INT) DUPLICATE KEY (k2)` - -* Rollup does not need to include partitioned or bucket columns for the Base table. - -## FAQ - -* How many rollups can a table create - -    There is theoretically no limit to the number of rollups a table can create, but too many rollups can affect import performance. Because when importing, data will be generated for all rollups at the same time. At the same time, Rollup will take up physical storage space. Usually the number of rollups for a table is less than 10. -     -* Rollup creation speed - -    Rollup creation speed is currently estimated at about 10MB / s based on the worst efficiency. To be conservative, users can set the timeout for jobs based on this rate. - -* Submitting job error `Table xxx is not stable. ...` - -    Rollup can start only when the table data is complete and unbalanced. If some data shard copies of the table are incomplete, or if some copies are undergoing an equalization operation, the submission is rejected. -     -    Whether the data shard copy is complete can be checked with the following command: -     -    ```ADMIN SHOW REPLICA STATUS FROM tbl WHERE STATUS! =" OK ";``` -     -    If a result is returned, there is a problem with the copy. These problems are usually fixed automatically by the system. You can also use the following commands to repair this table first: -     -    ```ADMIN REPAIR TABLE tbl1; ``` -     -    You can check if there are running balancing tasks with the following command: -     -    ```SHOW PROC" / cluster_balance / pending_tablets ";``` -     -    You can wait for the balancing task to complete, or temporarily disable the balancing operation with the following command: - - ```ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");``` - -## Configurations - -### FE Configurations - -* `alter_table_timeout_second`: The default timeout for the job is 86400 seconds. - -### BE Configurations - -* `alter_tablet_worker_count`: Number of threads used to perform historical data conversion on the BE side. The default is 3. If you want to speed up the rollup job, you can increase this parameter appropriately and restart the BE. But too many conversion threads can cause increased IO pressure and affect other operations. This thread is shared with the Schema Change job. diff --git a/docs/en/administrator-guide/alter-table/alter-table-schema-change.md b/docs/en/administrator-guide/alter-table/alter-table-schema-change.md deleted file mode 100644 index 31755b02a0..0000000000 --- a/docs/en/administrator-guide/alter-table/alter-table-schema-change.md +++ /dev/null @@ -1,231 +0,0 @@ ---- -{ - "title": "Schema Change", - "language": "en" -} ---- - - - -# Schema Change - -Users can modify the schema of existing tables through the Schema Change operation. Doris currently supports the following modifications: - -* Add and delete columns -* Modify column type -* Adjust column order -* Add and modify Bloom Filter -* Add and delete bitmap index - -This document mainly describes how to create a Schema Change job, as well as some considerations and frequently asked questions about Schema Change. -## Glossary - -* Base Table: When each table is created, it corresponds to a base table. The base table stores the complete data of this table. Rollups are usually created based on the data in the base table (and can also be created from other rollups). -* Index: Materialized index. Rollup or Base Table are both called materialized indexes. -* Transaction: Each import task is a transaction, and each transaction has a unique incrementing Transaction ID. -* Rollup: Roll-up tables based on base tables or other rollups. - -## Basic Principles - -The basic process of executing a Schema Change is to generate a copy of the index data of the new schema from the data of the original index. Among them, two parts of data conversion are required. One is the conversion of existing historical data, and the other is the conversion of newly arrived imported data during the execution of Schema Change. -``` -+----------+ -| Load Job | -+----+-----+ - | - | Load job generates both origin and new index data - | - | +------------------+ +---------------+ - | | Origin Index | | Origin Index | - +------> New Incoming Data| | History Data | - | +------------------+ +------+--------+ - | | - | | Convert history data - | | - | +------------------+ +------v--------+ - | | New Index | | New Index | - +------> New Incoming Data| | History Data | - +------------------+ +---------------+ -``` - -Before starting the conversion of historical data, Doris will obtain a latest transaction ID. And wait for all import transactions before this Transaction ID to complete. This Transaction ID becomes a watershed. This means that Doris guarantees that all import tasks after the watershed will generate data for both the original Index and the new Index. In this way, when the historical data conversion is completed, the data in the new Index can be guaranteed to be complete. -## Create Job - -The specific syntax for creating a Schema Change can be found in the description of the Schema Change section in the help `HELP ALTER TABLE`. - -The creation of Schema Change is an asynchronous process. After the job is submitted successfully, the user needs to view the job progress through the `SHOW ALTER TABLE COLUMN` command. -## View Job - -`SHOW ALTER TABLE COLUMN` You can view the Schema Change jobs that are currently executing or completed. When multiple indexes are involved in a Schema Change job, the command displays multiple lines, each corresponding to an index. For example: - -``` - JobId: 20021 - TableName: tbl1 - CreateTime: 2019-08-05 23:03:13 - FinishTime: 2019-08-05 23:03:42 - IndexName: tbl1 - IndexId: 20022 -OriginIndexId: 20017 -SchemaVersion: 2:792557838 -TransactionId: 10023 - State: FINISHED - Msg: - Progress: N/A - Timeout: 86400 -``` - -* JobId: A unique ID for each Schema Change job. -* TableName: The table name of the base table corresponding to Schema Change. -* CreateTime: Job creation time. -* FinishedTime: The end time of the job. If it is not finished, "N / A" is displayed. -* IndexName: The name of an Index involved in this modification. -* IndexId: The unique ID of the new Index. -* OriginIndexId: The unique ID of the old Index. -* SchemaVersion: Displayed in M: N format. M is the version of this Schema Change, and N is the corresponding hash value. With each Schema Change, the version is incremented. -* TransactionId: the watershed transaction ID of the conversion history data. -* State: The phase of the operation. - * PENDING: The job is waiting in the queue to be scheduled. - * WAITING_TXN: Wait for the import task before the watershed transaction ID to complete. - * RUNNING: Historical data conversion. - * FINISHED: The operation was successful. - * CANCELLED: The job failed. -* Msg: If the job fails, a failure message is displayed here. -* Progress: operation progress. Progress is displayed only in the RUNNING state. Progress is displayed in M ​​/ N. Where N is the total number of copies involved in the Schema Change. M is the number of copies of historical data conversion completed. -* Timeout: Job timeout time. Unit of second. - -## Cancel Job - -In the case that the job status is not FINISHED or CANCELLED, you can cancel the Schema Change job with the following command: -`CANCEL ALTER TABLE COLUMN FROM tbl_name;` - -## Best Practice - -Schema Change can make multiple changes to multiple indexes in one job. For example: -Source Schema: - -``` -+-----------+-------+------+------+------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+-----------+-------+------+------+------+---------+-------+ -| tbl1 | k1 | INT | No | true | N/A | | -| | k2 | INT | No | true | N/A | | -| | k3 | INT | No | true | N/A | | -| | | | | | | | -| rollup2 | k2 | INT | No | true | N/A | | -| | | | | | | | -| rollup1 | k1 | INT | No | true | N/A | | -| | k2 | INT | No | true | N/A | | -+-----------+-------+------+------+------+---------+-------+ -``` - -You can add a row k4 to both rollup1 and rollup2 by adding the following k5 to rollup2: -``` -ALTER TABLE tbl1 -ADD COLUMN k4 INT default "1" to rollup1, -ADD COLUMN k4 INT default "1" to rollup2, -ADD COLUMN k5 INT default "1" to rollup2; -``` - -When completion, the Schema becomes: - -``` -+-----------+-------+------+------+------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+-----------+-------+------+------+------+---------+-------+ -| tbl1 | k1 | INT | No | true | N/A | | -| | k2 | INT | No | true | N/A | | -| | k3 | INT | No | true | N/A | | -| | k4 | INT | No | true | 1 | | -| | k5 | INT | No | true | 1 | | -| | | | | | | | -| rollup2 | k2 | INT | No | true | N/A | | -| | k4 | INT | No | true | 1 | | -| | k5 | INT | No | true | 1 | | -| | | | | | | | -| rollup1 | k1 | INT | No | true | N/A | | -| | k2 | INT | No | true | N/A | | -| | k4 | INT | No | true | 1 | | -+-----------+-------+------+------+------+---------+-------+ -``` - -As you can see, the base table tbl1 also automatically added k4, k5 columns. That is, columns added to any rollup are automatically added to the Base table. - -At the same time, columns that already exist in the Base table are not allowed to be added to Rollup. If you need to do this, you can re-create a Rollup with the new columns and then delete the original Rollup. -## Notice - -* Only one Schema Change job can be running on a table at a time. - -* Schema Change operation does not block import and query operations. - -* The partition column and bucket column cannot be modified. - -* If there is a value column aggregated by REPLACE in the schema, the Key column is not allowed to be deleted. - -     If the Key column is deleted, Doris cannot determine the value of the REPLACE column. -     -     All non-Key columns of the Unique data model table are REPLACE aggregated. -     -* When adding a value column whose aggregation type is SUM or REPLACE, the default value of this column has no meaning to historical data. - -     Because the historical data has lost the detailed information, the default value cannot actually reflect the aggregated value. -     -* When modifying the column type, fields other than Type need to be completed according to the information on the original column. - -     If you modify the column `k1 INT SUM NULL DEFAULT" 1 "` as type BIGINT, you need to execute the following command: - -```ALTER TABLE tbl1 MODIFY COLUMN `k1` BIGINT SUM NULL DEFAULT "1";``` - - Note that in addition to the new column types, such as the aggregation mode, Nullable attributes, and default values must be completed according to the original information. - -* Modifying column names, aggregation types, nullable attributes, default values, and column comments is not supported. - -## FAQ - -* the execution speed of Schema Change - - At present, the execution speed of Schema Change is estimated to be about 10MB / s according to the worst efficiency. To be conservative, users can set the timeout for jobs based on this rate. - -* Submit job error `Table xxx is not stable. ...` - - Schema Change can only be started when the table data is complete and unbalanced. If some data shard copies of the table are incomplete, or if some copies are undergoing an equalization operation, the submission is rejected. -      - Whether the data shard copy is complete can be checked with the following command: - ```ADMIN SHOW REPLICA STATUS FROM tbl WHERE STATUS != "OK";``` - - If a result is returned, there is a problem with the copy. These problems are usually fixed automatically by the system. You can also use the following commands to repair this table first: - ```ADMIN REPAIR TABLE tbl1;``` - - You can check if there are running balancing tasks with the following command: - - ```SHOW PROC "/cluster_balance/pending_tablets";``` - - You can wait for the balancing task to complete, or temporarily disable the balancing operation with the following command: - - ```ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");``` - -## Configurations - -### FE Configurations - -* `alter_table_timeout_second`: The default timeout for the job is 86400 seconds. - -### BE Configurations - -* `alter_tablet_worker_count`: Number of threads used to perform historical data conversion on the BE side. The default is 3. If you want to speed up the Schema Change job, you can increase this parameter appropriately and restart the BE. But too many conversion threads can cause increased IO pressure and affect other operations. This thread is shared with the Rollup job. diff --git a/docs/en/administrator-guide/alter-table/alter-table-temp-partition.md b/docs/en/administrator-guide/alter-table/alter-table-temp-partition.md deleted file mode 100644 index 94f7440bf7..0000000000 --- a/docs/en/administrator-guide/alter-table/alter-table-temp-partition.md +++ /dev/null @@ -1,288 +0,0 @@ ---- -{ - "title": "Temporary partition", - "language": "en" -} ---- - - - -# Temporary partition - -Since version 0.12, Doris supports temporary partitioning. - -A temporary partition belongs to a partitioned table. Only partitioned tables can create temporary partitions. - -## Rules - -* The partition columns of the temporary partition is the same as the formal partition and cannot be modified. -* The partition ranges of all temporary partitions of a table cannot overlap, but the ranges of temporary partitions and formal partitions can overlap. -* The partition name of the temporary partition cannot be the same as the formal partitions and other temporary partitions. - -## Supported operations - -The temporary partition supports add, delete, and replace operations. - -### Add temporary partition - -You can add temporary partitions to a table with the `ALTER TABLE ADD TEMPORARY PARTITION` statement: - -``` -ALTER TABLE tbl1 ADD TEMPORARY PARTITION tp1 VALUES LESS THAN ("2020-02-01"); - -ALTER TABLE tbl2 ADD TEMPORARY PARTITION tp1 VALUES [("2020-01-01"), ("2020-02-01")); - -ALTER TABLE tbl1 ADD TEMPORARY PARTITION tp1 VALUES LESS THAN ("2020-02-01") -("in_memory" = "true", "replication_num" = "1") -DISTRIBUTED BY HASH (k1) BUCKETS 5; - -ALTER TABLE tbl3 ADD TEMPORARY PARTITION tp1 VALUES IN ("Beijing", "Shanghai"); - -ALTER TABLE tbl4 ADD TEMPORARY PARTITION tp1 VALUES IN ((1, "Beijing"), (1, "Shanghai")); - -ALTER TABLE tbl3 ADD TEMPORARY PARTITION tp1 VALUES IN ("Beijing", "Shanghai") -("in_memory" = "true", "replication_num" = "1") -DISTRIBUTED BY HASH(k1) BUCKETS 5; - -``` - -See `HELP ALTER TABLE;` for more help and examples. - -Some instructions for adding operations: - -* Adding a temporary partition is similar to adding a formal partition. The partition range of the temporary partition is independent of the formal partition. -* Temporary partition can independently specify some attributes. Includes information such as the number of buckets, the number of replicas, whether it is a memory table, or the storage medium. - -### Delete temporary partition - -A table's temporary partition can be dropped with the `ALTER TABLE DROP TEMPORARY PARTITION` statement: - -``` -ALTER TABLE tbl1 DROP TEMPORARY PARTITION tp1; -``` - -See `HELP ALTER TABLE;` for more help and examples. - -Some instructions for the delete operation: - -* Deleting the temporary partition will not affect the data of the formal partition. - -### Replace partition - -You can replace formal partitions of a table with temporary partitions with the `ALTER TABLE REPLACE PARTITION` statement. - -``` -ALTER TABLE tbl1 REPLACE PARTITION (p1) WITH TEMPORARY PARTITION (tp1); - -ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1, tp2, tp3); - -ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1, tp2) -PROPERTIES ( -    "strict_range" = "false", -    "use_temp_partition_name" = "true" -); -``` - -See `HELP ALTER TABLE;` for more help and examples. - -The replace operation has two special optional parameters: - -1. `strict_range` - - The default is true. - - For Range partition, When this parameter is true, the range union of all formal partitions to be replaced needs to be the same as the range union of the temporary partitions to be replaced. When set to false, you only need to ensure that the range between the new formal partitions does not overlap after replacement. - - For List partition, this parameter is always true, and the enumeration values of all full partitions to be replaced must be identical to the enumeration values of the temporary partitions to be replaced. - - Here are some examples: - - * Example 1 - - Range of partitions p1, p2, p3 to be replaced (=> union): - - ``` - (10, 20), [20, 30), [40, 50) => [10, 30), [40, 50) - ``` - - Replace the range of partitions tp1, tp2 (=> union): - - ``` - (10, 30), [40, 45), [45, 50) => [10, 30), [40, 50) - ``` - - The union of ranges is the same, so you can use tp1 and tp2 to replace p1, p2, p3. - - * Example 2 - - Range of partition p1 to be replaced (=> union): - - ``` - (10, 50) => [10, 50) - ``` - - Replace the range of partitions tp1, tp2 (=> union): - - ``` - (10, 30), [40, 50) => [10, 30), [40, 50) - ``` - - The union of ranges is not the same. If `strict_range` is true, you cannot use tp1 and tp2 to replace p1. If false, and the two partition ranges `[10, 30), [40, 50)` and the other formal partitions do not overlap, they can be replaced. - - * Example 3 - - Enumerated values of partitions p1, p2 to be replaced (=> union). - - ``` - (1, 2, 3), (4, 5, 6) => (1, 2, 3, 4, 5, 6) - ``` - - Replace the enumerated values of partitions tp1, tp2, tp3 (=> union). - - ``` - (1, 2, 3), (4), (5, 6) => (1, 2, 3, 4, 5, 6) - ``` - - The enumeration values are the same, you can use tp1, tp2, tp3 to replace p1, p2 - - * Example 4 - - Enumerated values of partitions p1, p2, p3 to be replaced (=> union). - - ``` - (("1", "beijing"), ("1", "shanghai")), (("2", "beijing"), ("2", "shanghai")), (("3", "beijing"), ("3", "shanghai")) => (("1", "beijing"), ("3", "shanghai")) "), ("1", "shanghai"), ("2", "beijing"), ("2", "shanghai"), ("3", "beijing"), ("3", "shanghai")) - ``` - - Replace the enumerated values of partitions tp1, tp2 (=> union). - - ``` - (("1", "beijing"), ("1", "shanghai")), (("2", "beijing"), ("2", "shanghai"), ("3", "beijing"), ("3", "shanghai")) => (("1", "beijing") , ("1", "shanghai"), ("2", "beijing"), ("2", "shanghai"), ("3", "beijing"), ("3", "shanghai")) - ``` - - The enumeration values are the same, you can use tp1, tp2 to replace p1, p2, p3 - -2. `use_temp_partition_name` - - The default is false. When this parameter is false, and the number of partitions to be replaced is the same as the number of replacement partitions, the name of the formal partition after the replacement remains unchanged. If true, after replacement, the name of the formal partition is the name of the replacement partition. Here are some examples: - - * Example 1 - - ``` - ALTER TABLE tbl1 REPLACE PARTITION (p1) WITH TEMPORARY PARTITION (tp1); - ``` - - `use_temp_partition_name` is false by default. After replacement, the partition name is still p1, but the related data and attributes are replaced with tp1. - - If `use_temp_partition_name` is true by default, the name of the partition is tp1 after replacement. The p1 partition no longer exists. - - * Example 2 - - ``` - ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1); - ``` - - `use_temp_partition_name` is false by default, but this parameter is invalid because the number of partitions to be replaced and the number of replacement partitions are different. After the replacement, the partition name is tp1, and p1 and p2 no longer exist. - -Some instructions for the replacement operation: - -* After the partition is replaced successfully, the replaced partition will be deleted and cannot be recovered. - -## Load and query of temporary partitions - -Users can load data into temporary partitions or specify temporary partitions for querying. - -1. Load temporary partition - - The syntax for specifying a temporary partition is slightly different depending on the load method. Here is a simple illustration through an example: - - ``` - INSERT INTO tbl TEMPORARY PARTITION (tp1, tp2, ...) SELECT .... - ``` - - ``` - curl --location-trusted -u root: -H "label: 123" -H "temporary_partition: tp1, tp2, ..." -T testData http: // host: port / api / testDb / testTbl / _stream_load - ``` - - ``` - LOAD LABEL example_db.label1 - ( - DATA INFILE ("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - TEMPORARY PARTITION (tp1, tp2, ...) - ... - ) - WITH BROKER hdfs ("username" = "hdfs_user", "password" = "hdfs_password"); - ``` - - ``` - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS (k1, k2, k3, v1, v2, v3 = k1 * 100), - TEMPORARY PARTITIONS (tp1, tp2, ...), - WHERE k1> 100 - PROPERTIES - (...) - FROM KAFKA - (...); - ``` - -2. Query the temporary partition - - ``` - SELECT ... FROM - tbl1 TEMPORARY PARTITION (tp1, tp2, ...) - JOIN - tbl2 TEMPORARY PARTITION (tp1, tp2, ...) - ON ... - WHERE ...; - ``` - -## Relationship to other operations - -### DROP - -* After using the `DROP` operation to directly drop the database or table, you can recover the database or table (within a limited time) through the `RECOVER` command, but the temporary partition will not be recovered. -* After the formal partition is dropped using the `ALTER` command, the partition can be recovered by the `RECOVER` command (within a limited time). Operating a formal partition is not related to a temporary partition. -* After the temporary partition is dropped using the `ALTER` command, the temporary partition cannot be recovered through the `RECOVER` command. - -### TRUNCATE - -* Use the `TRUNCATE` command to empty the table. The temporary partition of the table will be deleted and cannot be recovered. -* When using `TRUNCATE` command to empty the formal partition, it will not affect the temporary partition. -* You cannot use the `TRUNCATE` command to empty the temporary partition. - -### ALTER - -* When the table has a temporary partition, you cannot use the `ALTER` command to perform Schema Change, Rollup, etc. on the table. -* You cannot add temporary partitions to a table while the table is undergoing a alter operation. - - -## Best Practices - -1. Atomic overwrite - - In some cases, the user wants to be able to rewrite the data of a certain partition, but if it is dropped first and then loaded, there will be a period of time when the data cannot be seen. At this moment, the user can first create a corresponding temporary partition, load new data into the temporary partition, and then replace the original partition atomically through the `REPLACE` operation to achieve the purpose. For atomic overwrite operations of non-partitioned tables, please refer to [Replace Table Document](./alter-table-replace-table.md) -     -2. Modify the number of buckets - - In some cases, the user used an inappropriate number of buckets when creating a partition. The user can first create a temporary partition corresponding to the partition range and specify a new number of buckets. Then use the `INSERT INTO` command to load the data of the formal partition into the temporary partition. Through the replacement operation, the original partition is replaced atomically to achieve the purpose. -     -3. Merge or split partitions - - In some cases, users want to modify the range of partitions, such as merging two partitions, or splitting a large partition into multiple smaller partitions. Then the user can first create temporary partitions corresponding to the merged or divided range, and then load the data of the formal partition into the temporary partition through the `INSERT INTO` command. Through the replacement operation, the original partition is replaced atomically to achieve the purpose. \ No newline at end of file diff --git a/docs/en/administrator-guide/backup-restore.md b/docs/en/administrator-guide/backup-restore.md deleted file mode 100644 index 4e9ce83513..0000000000 --- a/docs/en/administrator-guide/backup-restore.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -{ - "title": "Backup and Recovery", - "language": "en" -} ---- - - - -# Backup and Recovery - -Doris supports the backup of current data in the form of files to remote storage systems via broker. The data can then be restored from the remote storage system to any Doris cluster by the restore command. With this feature, Doris can support regular snapshot backups of data. It can also be used to migrate data between different clusters. - -This feature requires Doris version 0.8.2+ - -Using this function, brokers corresponding to remote storage need to be deployed. Such as BOS, HDFS, etc. You can view the currently deployed broker through `SHOW BROKER;` - -## Brief Principle Description - -### Backup - -The backup operation is to upload the data of the specified table or partition directly to the remote warehouse in the form of files stored by Doris for storage. When a user submits a Backup request, the following actions will be done within the system: - -1. Snapshot and snapshot upload - - The snapshot phase takes a snapshot of the specified table or partition data file. Later, backups are all snapshots. After the snapshot, changes to tables, imports, and other operations no longer affect the results of the backup. Snapshots only produce a hard link to the current data file, which takes very little time. Once the snapshots are completed, they are uploaded one by one. Snapshot upload is done concurrently by each Backend. - -2. Metadata preparation and upload - - After the data file snapshot is uploaded, Frontend first writes the corresponding metadata to the local file, and then uploads the local metadata file to the remote warehouse through broker. Finish the final backup job. - -3. Dynamic partition table description - - If the table is a dynamic partition table, the dynamic partition attribute will be automatically disabled after backup. When restoring, you need to manually enable the dynamic partition attribute of the table. The command is as follows: - - ```sql - ALTER TABLE tbl1 SET ("dynamic_partition.enable"="true") - ```` - -### Restore - -Recovery operations need to specify a backup that already exists in a remote repository, and then restore the backup content to the local cluster. When a user submits a Restore request, the following actions will be done within the system: - -1. Create corresponding metadata locally - - This step starts by creating structures such as restoring the corresponding table partitions in the local cluster. When created, the table is visible, but not accessible. - -2. Local snapshot - - This step is to take a snapshot of the table created in the previous step. This is actually an empty snapshot (because the tables just created have no data), and its main purpose is to generate the corresponding snapshot directory on the Backend for receiving the snapshot files downloaded from the remote repository later. - -3. Download snapshots - - The snapshot files in the remote warehouse are downloaded to the corresponding snapshot directory generated in the previous step. This step is done concurrently by each backend. - -4. Effective snapshot - - When the snapshot download is complete, we map each snapshot to the metadata of the current local table. These snapshots are then reloaded to take effect and complete the final recovery operation. - -## Best Practices - -### Backup - -We currently support full backup at the minimum partition granularity (incremental backup may be supported in future versions). If data need to be backed up regularly, first of all, it is necessary to plan the partition and bucket allocation of tables reasonably, such as partitioning according to time. Then in the subsequent run process, periodic data backup is performed according to partition granularity. - -### Data migration - -Users can first backup the data to the remote warehouse, and then restore the data to another cluster through the remote warehouse to complete data migration. Because data backup is done in the form of snapshots, new imported data after the snapshot phase of the backup job will not be backed up. Therefore, after the snapshot is completed, the data imported on the original cluster needs to be imported on the new cluster as well until the recovery job is completed. - -It is suggested that the new and old clusters be imported in parallel for a period of time after the migration is completed. After completing data and business correctness checks, the business is migrated to the new cluster. - -## Highlights - -1. Backup and recovery-related operations are currently only allowed to be performed by users with ADMIN privileges. -2. Within a database, only one backup or recovery job is allowed to be performed. -3. Both backup and recovery support the operation at the minimum partition level. When the table has a large amount of data, it is recommended to perform partition-by-partition to reduce the cost of failed retries. -4. Because backup and recovery operations, the operation is the actual data files. So when there are too many fragments of a table or too many small versions of a fragment, it may take a long time to backup or restore even if the total amount of data is very small. Users can estimate job execution time by `SHOW PARTITIONS FROM table_name;`, and `SHOW TABLET FROM table_name;`, viewing the number of partitions and the number of file versions of each partition. The number of files has a great impact on the execution time of the job, so it is suggested that the partition buckets should be planned reasonably in order to avoid excessive partitioning. -5. When viewing the job status through `SHOW BACKUP` or `SHOW RESTORE`. It is possible to see an error message in the `TaskErrMsg` column. But as long as the `State` column does not -`CANCELLED`, that means the job is still going on. These Tasks may succeed in retrying. Of course, some Task errors can also directly lead to job failure. -6. If the recovery operation is a coverage operation (specifying the recovery data to an existing table or partition), then starting from the `COMMIT` phase of the recovery operation, the data covered on the current cluster may not be restored. At this time, if the recovery operation fails or is cancelled, it may cause the previous data to be damaged and inaccessible. In this case, the recovery operation can only be performed again and wait for the job to complete. Therefore, we recommend that if it is not necessary, try not to use coverage to recover data unless it is confirmed that the current data is no longer in use. - -## Relevant orders - -The commands related to the backup recovery function are as follows. The following commands, you can use `help cmd;'to view detailed help after connecting Doris through mysql-client. - -1. CREATE REPOSITORY - - Create a remote warehouse Path for backup or recovery. This command needs to access the remote storage through the Broker. Different brokers need to provide different parameters. For details, please refer to [Broker Document] (broker.md), or directly back up to the remote storage supporting AWS S3 protocol through S3 protocol. For details, please refer to [CREATE REPOSITORY DOCUMENT] (../sql-reference/sql-statements/Data%20Definition/CREATE%20REPOSITORY.md) - - -1. BACKUP - - Perform a backup operation. - -3. SHOW BACKUP - - View the execution of the last backup job, including: - - * JobId: ID of this backup job. - * SnapshotName: User-specified name of this backup job (Label). - * DbName: The database corresponding to the backup job. - * State: The current stage of the backup job: - * PENDING: The initial state of the job. - * SNAPSHOTING: Snapshot operation is in progress. - * UPLOAD_SNAPSHOT: The snapshot is over and ready to upload. - * UPLOADING: Uploading snapshots. - * SAVE_META: Metadata files are being generated locally. - * UPLOAD_INFO: Upload metadata files and information for this backup job. - * FINISHED: The backup is complete. - * CANCELLED: Backup failed or cancelled. - * Backup Objs: List of tables and partitions involved in this backup. - * CreateTime: Job creation time. - * Snapshot Finished Time: Snapshot completion time. - * Upload Finished Time: Snapshot upload completion time. - * FinishedTime: The completion time of this assignment. - * Unfinished Tasks: In the `SNAPSHOTTING`, `UPLOADING` and other stages, there will be multiple sub-tasks at the same time, the current stage shown here, the task ID of the unfinished sub-tasks. - * TaskErrMsg: If there is a sub-task execution error, the error message corresponding to the sub-task will be displayed here. - * Status: It is used to record some status information that may appear during the whole operation. - * Timeout: The timeout time of a job in seconds. - -4. SHOW SNAPSHOT - - View the backup that already exists in the remote warehouse. - - * Snapshot: The name of the backup specified at the time of backup (Label). - * Timestamp: Backup timestamp. - * Status: Is the backup normal? - - If the where clause is specified after `SHOW SNAPSHOT', more detailed backup information can be displayed. - - * Database: The database corresponding to backup. - * Details: Shows the complete data directory structure of the backup. - -5. RESTORE - - Perform a recovery operation. - -6. SHOW RESTORE - - View the execution of the last restore job, including: - - * JobId: ID of this resumption job. - * Label: The name of the backup in the user-specified warehouse (Label). - * Timestamp: The timestamp for backup in a user-specified warehouse. - * DbName: Restore the database corresponding to the job. - * State: The current stage of the recovery operation: - * PENDING: The initial state of the job. - * SNAPSHOTING: A snapshot of a new local table is in progress. - * DOWNLOAD: The download snapshot task is being sent. - * DOWNLOADING: Snapshot is downloading. - * COMMIT: Prepare to take effect the downloaded snapshot. - * COMMITTING: The downloaded snapshot is in effect. - * FINISHED: Recovery is complete. - * CANCELLED: Recovery failed or cancelled. - * AllowLoad: Is import allowed during recovery? - * ReplicationNum: Restores the specified number of copies. - * Restore Objs: List of tables and partitions involved in this recovery. - * CreateTime: Job creation time. - * MetaPreparedTime: Completion time of local metadata generation. - * Snapshot Finished Time: Local snapshot completion time. - * Download Finished Time: The download completion time of the remote snapshot. - * FinishedTime: The completion time of this assignment. - * Unfinished Tasks: In the `SNAPSHOTTING`, `DOWNLOADING`, `COMMITTING`, and other stages, there will be multiple sub-tasks at the same time, the current stage shown here, the task ID of the unfinished sub-tasks. - * TaskErrMsg: If there is a sub-task execution error, the error message corresponding to the sub-task will be displayed here. - * Status: It is used to record some status information that may appear during the whole operation. - * Timeout: The timeout time of a job in seconds. - -7. CANCEL BACKUP - - Cancel the backup job currently being performed. - -8. CANCEL RESTORE - - Cancel the recovery job currently being performed. - -9. DROP REPOSITORY - - Delete the created remote warehouse. Delete the warehouse, just delete the mapping of the warehouse in Doris, will not delete the actual warehouse data. diff --git a/docs/en/administrator-guide/block-rule/sql-block.md b/docs/en/administrator-guide/block-rule/sql-block.md deleted file mode 100644 index 0b167ae17c..0000000000 --- a/docs/en/administrator-guide/block-rule/sql-block.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -{ -"title": "SQL Block Rule", -"language": "en" -} ---- - - - -# SQL Block Rule - -This function is only used to limit the query statement, and does not limit the execution of the explain statement. -Support SQL block rule by user level: - -1. by regex way to deny specify SQL - -2. by setting partition_num, tablet_num, cardinality, check whether a sql reaches one of the limitations - - partition_num, tablet_num, cardinality could be set together, and once reach one of them, the sql will be blocked. - -## Rule - -SQL block rule CRUD -- create SQL block rule - - sql: Regex pattern,Special characters need to be translated, "NULL" by default - - sqlHash: Sql hash value, Used to match exactly, We print it in fe.audit.log, This parameter is the only choice between sql and sql, "NULL" by default - - partition_num: Max number of partitions will be scanned by a scan node, 0L by default - - tablet_num: Max number of tablets will be scanned by a scan node, 0L by default - - cardinality: An inaccurate number of scan rows of a scan node, 0L by default - - global: Whether global(all users)is in effect, false by default - - enable: Whether to enable block rule,true by default -```sql -CREATE SQL_BLOCK_RULE test_rule -PROPERTIES( - "sql"="select * from order_analysis", - "global"="false", - "enable"="true", - "sqlHash"="" -) -``` -When we execute the sql that we defined in the rule just now, an exception error will be returned. An example is as follows: -```sql -mysql> select * from order_analysis; -ERROR 1064 (HY000): errCode = 2, detailMessage = sql match regex sql block rule: order_analysis_rule -``` - -- create test_rule2, limits the maximum number of scanning partitions to 30 and the maximum scanning cardinality to 10 billion rows. As shown in the following example: -```sql -CREATE SQL_BLOCK_RULE test_rule2 PROPERTIES("partition_num" = "30", "cardinality"="10000000000","global"="false","enable"="true") -``` - -- show configured SQL block rules, or show all rules if you do not specify a rule name - -```sql -SHOW SQL_BLOCK_RULE [FOR RULE_NAME] -``` -- alter SQL block rule,Allows changes sql/sqlHash/global/enable/partition_num/tablet_num/cardinality anyone - - sql and sqlHash cannot be set both. It means if sql or sqlHash is set in a rule, another property will never be allowed to be altered - - sql/sqlHash and partition_num/tablet_num/cardinality cannot be set together. For example, partition_num is set in a rule, then sql or sqlHash will never be allowed to be altered. -```sql -ALTER SQL_BLOCK_RULE test_rule PROPERTIES("sql"="select \\* from test_table","enable"="true") -``` - -``` -ALTER SQL_BLOCK_RULE test_rule2 PROPERTIES("partition_num" = "10","tablet_num"="300","enable"="true") -``` - -- drop SQL block rule,Support multiple rules, separated by `,` -```sql -DROP SQL_BLOCK_RULE test_rule1,test_rule2 -``` - -## User bind rules -If global=false is configured, the rules binding for the specified user needs to be configured, with multiple rules separated by ', ' -```sql -SET PROPERTY [FOR 'jack'] 'sql_block_rules' = 'test_rule1,test_rule2' -``` diff --git a/docs/en/administrator-guide/broker.md b/docs/en/administrator-guide/broker.md deleted file mode 100644 index 0bac53bdc6..0000000000 --- a/docs/en/administrator-guide/broker.md +++ /dev/null @@ -1,289 +0,0 @@ ---- -{ - "title": "Broker", - "language": "en" -} ---- - - - -# Broker - -Broker is an optional process in the Doris cluster. It is mainly used to support Doris to read and write files or directories on remote storage, such as HDFS, BOS, and AFS. - -Broker provides services through an RPC service port. It is a stateless JVM process that is responsible for encapsulating some POSIX-like file operations for read and write operations on remote storage, such as open, pred, pwrite, and so on. -In addition, the Broker does not record any other information, so the connection information, file information, permission information, and so on stored remotely need to be passed to the Broker process in the RPC call through parameters in order for the Broker to read and write files correctly . - -Broker only acts as a data channel and does not participate in any calculations, so it takes up less memory. Usually one or more Broker processes are deployed in a Doris system. And the same type of Broker will form a group and set a ** Broker name **. - -Broker's position in the Doris system architecture is as follows: - -``` -+----+ +----+ -| FE | | BE | -+-^--+ +--^-+ - | | - | | -+-v---------v-+ -| Broker | -+------^------+ - | - | -+------v------+ -|HDFS/BOS/AFS | -+-------------+ -``` - -This document mainly introduces the parameters that Broker needs when accessing different remote storages, such as connection information, -authorization information, and so on. - -## Supported Storage System - -Different types of brokers support different storage systems. - -1. Community HDFS - - * Support simple authentication access - * Support kerberos authentication access - * Support HDFS HA mode access - -2. Baidu HDFS / AFS (not supported by open source version) - - * Support UGI simple authentication access - -3. Baidu Object Storage BOS (not supported by open source version) - - * Support AK / SK authentication access - -## Function provided by Broker - -1. Broker Load - - The Broker Load function reads the file data on the remote storage through the Broker process and imports it into Doris. Examples are as follows: - - ``` - LOAD LABEL example_db.label6 - ( - DATA INFILE("bos://my_bucket/input/file") - INTO TABLE `my_table` - ) - WITH BROKER "broker_name" - ( - "bos_endpoint" = "http://bj.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyy" - ) - ``` - - `WITH BROKER` and following Property Map are used to provide Broker's related information. - -2. Export - - The Export function export the data stored in Doris to a file stored in remote storage in text format through Broker process. Examples are as follows: - - ``` - EXPORT TABLE testTbl - TO "hdfs://hdfs_host:port/a/b/c" - WITH BROKER "broker_name" - ( - "username" = "xxx", - "password" = "yyy" - ); - ``` - - `WITH BROKER` and following Property Map are used to provide Broker's related information. - -3. Create Repository - - When users need to use the backup and restore function, they need to first create a "repository" with the `CREATE REPOSITORY` command,and the broker metadata and related information are recorded in the warehouse metadata. - Subsequent backup and restore operations will use Broker to back up data to this warehouse, or read data from this warehouse to restore to Doris. Examples are as follows: - - ``` - CREATE REPOSITORY `bos_repo` - WITH BROKER `broker_name` - ON LOCATION "bos://doris_backup" - PROPERTIES - ( - "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyy" - ); - ``` - - `WITH BROKER` and following Property Map are used to provide Broker's related information. - - -## Broker Information - -Broker information includes two parts: ** Broker name ** and ** Certification information **. The general syntax is as follows: - -``` -WITH BROKER "broker_name" -( - "username" = "xxx", - "password" = "yyy", - "other_prop" = "prop_value", - ... -); -``` - -### Broker Name - -Usually the user needs to specify an existing Broker Name through the `WITH BROKER" broker_name "` clause in the operation command. -Broker Name is a name that the user specifies when adding a Broker process through the ALTER SYSTEM ADD BROKER command. -A name usually corresponds to one or more broker processes. Doris selects available broker processes based on the name. -You can use the `SHOW BROKER` command to view the Brokers that currently exist in the cluster. - -**Note: Broker Name is just a user-defined name and does not represent the type of Broker.** - -### Certification Information - -Different broker types and different access methods need to provide different authentication information. -Authentication information is usually provided as a Key-Value in the Property Map after `WITH BROKER" broker_name "`. - -#### Community HDFS - -1. Simple Authentication - - Simple authentication means that Hadoop configures `hadoop.security.authentication` to` simple`. - - Use system users to access HDFS. Or add in the environment variable started by Broker: ```HADOOP_USER_NAME```. - - ``` - ( - "username" = "user", - "password" = "" - ); - ``` - - Just leave the password blank. - -2. Kerberos Authentication - - The authentication method needs to provide the following information:: - - * `hadoop.security.authentication`: Specify the authentication method as kerberos. - * `kerberos_principal`: Specify the principal of kerberos. - * `kerberos_keytab`: Specify the path to the keytab file for kerberos. The file must be an absolute path to a file on the server where the broker process is located. And can be accessed by the Broker process. - * `kerberos_keytab_content`: Specify the content of the keytab file in kerberos after base64 encoding. You can choose one of these with `kerberos_keytab` configuration. - - Examples are as follows: - - ``` - ( - "hadoop.security.authentication" = "kerberos", - "kerberos_principal" = "doris@YOUR.COM", - "kerberos_keytab" = "/home/doris/my.keytab" - ) - ``` - ``` - ( - "hadoop.security.authentication" = "kerberos", - "kerberos_principal" = "doris@YOUR.COM", - "kerberos_keytab_content" = "ASDOWHDLAWIDJHWLDKSALDJSDIWALD" - ) - ``` - If Kerberos authentication is used, the [krb5.conf](https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html) file is required when deploying the Broker process. - The krb5.conf file contains Kerberos configuration information, Normally, you should install your krb5.conf file in the directory /etc. You can override the default location by setting the environment variable KRB5_CONFIG. - An example of the contents of the krb5.conf file is as follows: - ``` - [libdefaults] - default_realm = DORIS.HADOOP - default_tkt_enctypes = des3-hmac-sha1 des-cbc-crc - default_tgs_enctypes = des3-hmac-sha1 des-cbc-crc - dns_lookup_kdc = true - dns_lookup_realm = false - - [realms] - DORIS.HADOOP = { - kdc = kerberos-doris.hadoop.service:7005 - } - ``` - -3. HDFS HA Mode - - This configuration is used to access HDFS clusters deployed in HA mode. - - * `dfs.nameservices`: Specify the name of the hdfs service, custom, such as "dfs.nameservices" = "my_ha". - * `dfs.ha.namenodes.xxx`: Custom namenode names. Multiple names are separated by commas, where xxx is the custom name in `dfs.nameservices`, such as" dfs.ha.namenodes.my_ha "=" my_nn ". - * `dfs.namenode.rpc-address.xxx.nn`: Specify the rpc address information of namenode, Where nn represents the name of the namenode configured in `dfs.ha.namenodes.xxx`, such as: "dfs.namenode.rpc-address.my_ha.my_nn" = "host:port". - * `dfs.client.failover.proxy.provider`: Specify the provider for the client to connect to the namenode. The default is: org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider. - - Examples are as follows: - - ``` - ( - "dfs.nameservices" = "my_ha", - "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2", - "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port", - "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port", - "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ) - ``` - - The HA mode can be combined with the previous two authentication methods for cluster access. If you access HA HDFS with simple authentication: - - ``` - ( - "username"="user", - "password"="passwd", - "dfs.nameservices" = "my_ha", - "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2", - "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port", - "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port", - "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ) - ``` - The configuration for accessing the HDFS cluster can be written to the hdfs-site.xml file. When users use the Broker process to read data from the HDFS cluster, they only need to fill in the cluster file path and authentication information. - -#### Baidu Object Storage BOS - -**(Open source version is not supported)** - -1. Access via AK / SK - - * AK/SK: Access Key and Secret Key. You can check the user's AK / SK in Baidu Cloud Security Certification Center. - * Region Endpoint: Endpoint of the BOS region. - * For the regions supported by BOS and corresponding Endpoints, please see [Get access domain name](https://cloud.baidu.com/doc/BOS/s/Ck1rk80hn#%E8%8E%B7%E5%8F%96%E8%AE %BF%E9%97%AE%E5%9F%9F%E5%90%8D) - - Examples are as follows: - - ``` - ( - "bos_endpoint" = "http://bj.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyyyyyyyy" - ) - ``` - -#### Baidu HDFS/AFS - -**(Open source version is not supported)** - -Baidu AFS and HDFS only support simple authentication access using UGI. Examples are as follows: - -``` -( - "username" = "user", - "password" = "passwd" -); -``` - -User and passwd are UGI configurations for Hadoop. diff --git a/docs/en/administrator-guide/bucket-shuffle-join.md b/docs/en/administrator-guide/bucket-shuffle-join.md deleted file mode 100644 index a2edaef4fc..0000000000 --- a/docs/en/administrator-guide/bucket-shuffle-join.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -{ - "title": "Bucket Shuffle Join", - "language": "en" -} ---- - - - -# Bucket Shuffle Join - -Bucket Shuffle Join is a new function officially added in Doris 0.14. The purpose is to provide local optimization for some join queries to reduce the time-consuming of data transmission between nodes and speed up the query. - -It's design, implementation can be referred to [ISSUE 4394](https://github.com/apache/incubator-doris/issues/4394). - -## Noun Interpretation - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. -* Left table: the left table in join query. Perform probe expr. The order can be adjusted by join reorder. -* Right table: the right table in join query. Perform build expr The order can be adjusted by join reorder. - -## Principle -The conventional distributed join methods supported by Doris is: `Shuffle Join, Broadcast Join`. Both of these join will lead to some network overhead. - -For example, there are join queries for table A and table B. the join method is hashjoin. The cost of different join types is as follows: -* **Broadcast Join**: If table a has three executing hashjoinnodes according to the data distribution, table B needs to be sent to the three HashJoinNode. Its network overhead is `3B `, and its memory overhead is `3B`. -* **Shuffle Join**: Shuffle join will distribute the data of tables A and B to the nodes of the cluster according to hash calculation, so its network overhead is `A + B` and memory overhead is `B`. - -The data distribution information of each Doris table is saved in FE. If the join statement hits the data distribution column of the left table, we should use the data distribution information to reduce the network and memory overhead of the join query. This is the source of the idea of bucket shuffle join. - -![image.png](/images/bucket_shuffle_join.png) - -The picture above shows how the Bucket Shuffle Join works. The SQL query is A table join B table. The equivalent expression of join hits the data distribution column of A. According to the data distribution information of table A. Bucket Shuffle Join sends the data of table B to the corresponding data storage and calculation node of table A. The cost of Bucket Shuffle Join is as follows: - -* network cost: ``` B < min(3B, A + B) ``` - -* memory cost: ``` B <= min(3B, B) ``` - -Therefore, compared with Broadcast Join and Shuffle Join, Bucket shuffle join has obvious performance advantages. It reduces the time-consuming of data transmission between nodes and the memory cost of join. Compared with Doris's original join method, it has the following advantages - -* First of all, Bucket Shuffle Join reduces the network and memory cost which makes some join queries have better performance. Especially when FE can perform partition clipping and bucket clipping of the left table. -* Secondly, unlike Colorate Join, it is not intrusive to the data distribution of tables, which is transparent to users. There is no mandatory requirement for the data distribution of the table, which is not easy to lead to the problem of data skew. -* Finally, it can provide more optimization space for join reorder. - -## Usage - -### Set session variable - -Set session variable `enable_bucket_shuffle_join` to `true`, FE will automatically plan queries that can be converted to Bucket Shuffle Join. - -``` -set enable_bucket_shuffle_join = true; -``` - -In FE's distributed query planning, the priority order is Colorate Join -> Bucket Shuffle Join -> Broadcast Join -> Shuffle Join. However, if the user explicitly hints the type of join, for example: - -``` -select * from test join [shuffle] baseall on test.k1 = baseall.k1; -``` -the above order of preference will not take effect. - -The session variable is set to `true` by default in version 0.14, while it needs to be set to `true` manually in version 0.13. - -### View the type of join - -You can use the `explain` command to check whether the join is a Bucket Shuffle Join - -``` -| 2:HASH JOIN | -| | join op: INNER JOIN (BUCKET_SHUFFLE) | -| | hash predicates: | -| | colocate: false, reason: table not in the same group | -| | equal join conjunct: `test`.`k1` = `baseall`.`k1` -``` - -The join type indicates that the join method to be used is:`BUCKET_SHUFFLE`. - -## Planning rules of Bucket Shuffle Join - -In most scenarios, users only need to turn on the session variable by default to transparently use the performance improvement brought by this join method. However, if we understand the planning rules of Bucket Shuffle Join, we can use it to write more efficient SQL. - -* Bucket Shuffle Join only works when the join condition is equivalent. The reason is similar to Colorate Join. They all rely on hash to calculate the determined data distribution. -* The bucket column of two tables is included in the equivalent join condition. When the bucket column of the left table is an equivalent join condition, it has a high probability of being planned as a Bucket Shuffle Join. -* Because the hash values of different data types have different calculation results. Bucket Shuffle Join requires that the bucket column type of the left table and the equivalent join column type of the right table should be consistent, otherwise the corresponding planning cannot be carried out. -* Bucket Shuffle Join only works on Doris native OLAP tables. For ODBC, MySQL, ES External Table, when they are used as left tables, they cannot be planned as Bucket Shuffle Join. -* For partitioned tables, because the data distribution rules of each partition may be different, the Bucket Shuffle Join can only guarantee that the left table is a single partition. Therefore, in SQL execution, we need to use the `where` condition as far as possible to make the partition clipping policy effective. -* If the left table is a colorate table, the data distribution rules of each partition are determined. So the bucket shuffle join can perform better on the colorate table. diff --git a/docs/en/administrator-guide/colocation-join.md b/docs/en/administrator-guide/colocation-join.md deleted file mode 100644 index 390e2f27e6..0000000000 --- a/docs/en/administrator-guide/colocation-join.md +++ /dev/null @@ -1,409 +0,0 @@ ---- -{ - "title": "Colocation Join", - "language": "en" -} ---- - - - -# Colocation Join - -Colocation Join is a new feature introduced in Doris 0.9. The purpose of this paper is to provide local optimization for some Join queries to reduce data transmission time between nodes and speed up queries. - -The original design, implementation and effect can be referred to [ISSUE 245](https://github.com/apache/incubator-doris/issues/245). - -The Colocation Join function has undergone a revision, and its design and use are slightly different from the original design. This document mainly introduces Colocation Join's principle, implementation, usage and precautions. - -## Noun Interpretation - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. -* Colocation Group (CG): A CG contains one or more tables. Tables within the same group have the same Colocation Group Schema and the same data fragmentation distribution. -* Colocation Group Schema (CGS): Used to describe table in a CG and general Schema information related to Colocation. Including bucket column type, bucket number and copy number. - -## Principle - -The Colocation Join function is to make a CG of a set of tables with the same CGS. Ensure that the corresponding data fragments of these tables will fall on the same BE node. When tables in CG perform Join operations on bucket columns, local data Join can be directly performed to reduce data transmission time between nodes. - -The data of a table will eventually fall into a barrel according to the barrel column value Hash and the number of barrels modeled. Assuming that the number of buckets in a table is 8, there are eight buckets `[0, 1, 2, 3, 4, 5, 6, 7] `Buckets'. We call such a sequence a `Buckets Sequence`. Each Bucket has one or more Tablets. When a table is a single partitioned table, there is only one Tablet in a Bucket. If it is a multi-partition table, there will be more than one. - -In order for a table to have the same data distribution, the table in the same CG must ensure the following attributes are the same: - -1. Barrel row and number of barrels - - Bucket column, that is, the column specified in `DISTRIBUTED BY HASH (col1, col2,...)'in the table building statement. Bucket columns determine which column values are used to Hash data from a table into different Tablets. Tables in the same CG must ensure that the type and number of barrel columns are identical, and the number of barrels is identical, so that the data fragmentation of multiple tables can be controlled one by one. - -2. Number of copies - - The number of copies of all partitions of all tables in the same CG must be the same. If inconsistent, there may be a copy of a Tablet, and there is no corresponding copy of other table fragments on the same BE. - -Tables in the same CG do not require consistency in the number, scope, and type of partition columns. - -After fixing the number of bucket columns and buckets, the tables in the same CG will have the same Buckets Sequence. The number of replicas determines the number of replicas of Tablets in each bucket, which BE they are stored on. Suppose that Buckets Sequence is `[0, 1, 2, 3, 4, 5, 6, 7] `, and that BE nodes have `[A, B, C, D] `4. A possible distribution of data is as follows: - -``` -+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ -| 0 | | 1 | | 2 | | 3 | | 4 | | 5 | | 6 | | 7 | -+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ -| A | | B | | C | | D | | A | | B | | C | | D | -| | | | | | | | | | | | | | | | -| B | | C | | D | | A | | B | | C | | D | | A | -| | | | | | | | | | | | | | | | -| C | | D | | A | | B | | C | | D | | A | | B | -+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ -``` - -The data of all tables in CG will be uniformly distributed according to the above rules, which ensures that the data with the same barrel column value are on the same BE node, and local data Join can be carried out. - -## Usage - -### Establishment of tables - -When creating a table, you can specify the attribute `"colocate_with"="group_name"` in `PROPERTIES`, which means that the table is a Colocation Join table and belongs to a specified Colocation Group. - -Examples: - -``` -CREATE TABLE tbl (k1 int, v1 int sum) -DISTRIBUTED BY HASH(k1) -BUCKETS 8 -PROPERTIES( - "colocate_with" = "group1" -); -``` - -If the specified group does not exist, Doris automatically creates a group that contains only the current table. If the Group already exists, Doris checks whether the current table satisfies the Colocation Group Schema. If satisfied, the table is created and added to the Group. At the same time, tables create fragments and replicas based on existing data distribution rules in Groups. -Group belongs to a database, and its name is unique in a database. Internal storage is the full name of Group `dbId_groupName`, but users only perceive groupName. - -### Delete table - -When the last table in Group is deleted completely (deleting completely means deleting from the recycle bin). Usually, when a table is deleted by the `DROP TABLE` command, it will be deleted after the default one-day stay in the recycle bin, and the group will be deleted automatically. - -### View Group - -The following command allows you to view the existing Group information in the cluster. - -``` -SHOW PROC '/colocation_group'; - -+-------------+--------------+--------------+------------+----------------+----------+----------+ -| GroupId | GroupName | TableIds | BucketsNum | ReplicationNum | DistCols | IsStable | -+-------------+--------------+--------------+------------+----------------+----------+----------+ -| 10005.10008 | 10005_group1 | 10007, 10040 | 10 | 3 | int(11) | true | -+-------------+--------------+--------------+------------+----------------+----------+----------+ -``` - -* GroupId: The unique identity of a group's entire cluster, with DB ID in the first half and group ID in the second half. -* GroupName: The full name of Group. -* Tablet Ids: The group contains a list of Tables'ID. -* Buckets Num: Number of barrels. -* Replication Num: Number of copies. -* DistCols: Distribution columns, -* IsStable: Is the group stable (for the definition of stability, see section `Collocation replica balancing and repair'). - -You can further view the data distribution of a group by following commands: - -``` -SHOW PROC '/colocation_group/10005.10008'; - -+-------------+---------------------+ -| BucketIndex | BackendIds | -+-------------+---------------------+ -| 0 | 10004, 10002, 10001 | -| 1 | 10003, 10002, 10004 | -| 2 | 10002, 10004, 10001 | -| 3 | 10003, 10002, 10004 | -| 4 | 10002, 10004, 10003 | -| 5 | 10003, 10002, 10001 | -| 6 | 10003, 10004, 10001 | -| 7 | 10003, 10004, 10002 | -+-------------+---------------------+ -``` - -* BucketIndex: Subscript to the bucket sequence. -* Backend Ids: A list of BE node IDs where data fragments are located in buckets. - -> The above commands require ADMIN privileges. Normal user view is not supported at this time. - -### Modify Colocate Group - -You can modify the Colocation Group property of a table that has been created. Examples: - -`ALTER TABLE tbl SET ("colocate_with" = "group2");` - -* If the table has not previously specified a Group, the command checks the Schema and adds the table to the Group (if the Group does not exist, it will be created). -* If other groups are specified before the table, the command first removes the table from the original group and adds a new group (if the group does not exist, it will be created). - -You can also delete the Colocation attribute of a table by following commands: - -`ALTER TABLE tbl SET ("colocate_with" = "");` - -### Other related operations - -When an ADD PARTITION is added to a table with a Colocation attribute and the number of copies is modified, Doris checks whether the modification violates the Colocation Group Schema and rejects it if it does. - -## Colocation Duplicate Balancing and Repair - -Copy distribution of Colocation tables needs to follow the distribution specified in Group, so it is different from common fragmentation in replica repair and balancing. - -Group itself has a Stable attribute, when Stable is true, which indicates that all fragments of the table in the current Group are not changing, and the Colocation feature can be used normally. When Stable is false, it indicates that some tables in Group are being repaired or migrated. At this time, Colocation Join of related tables will degenerate into ordinary Join. - -### Replica Repair - -Copies can only be stored on specified BE nodes. So when a BE is unavailable (downtime, Decommission, etc.), a new BE is needed to replace it. Doris will first look for the BE with the lowest load to replace it. After replacement, all data fragments on the old BE in the Bucket will be repaired. During the migration process, Group is marked Unstable. - -### Duplicate Equilibrium - -Doris will try to distribute the fragments of the Collocation table evenly across all BE nodes. For the replica balancing of common tables, the granularity is single replica, that is to say, it is enough to find BE nodes with lower load for each replica alone. The equilibrium of the Colocation table is at the Bucket level, where all replicas within a Bucket migrate together. We adopt a simple equalization algorithm, which distributes Buckets Sequence evenly on all BEs, regardless of the actual size of the replicas, but only according to the number of replicas. Specific algorithms can be referred to the code annotations in `ColocateTableBalancer.java`. - -> Note 1: Current Colocation replica balancing and repair algorithms may not work well for heterogeneous deployed Oris clusters. The so-called heterogeneous deployment, that is, the BE node's disk capacity, number, disk type (SSD and HDD) is inconsistent. In the case of heterogeneous deployment, small BE nodes and large BE nodes may store the same number of replicas. -> -> Note 2: When a group is in an Unstable state, the Join of the table in it will degenerate into a normal Join. At this time, the query performance of the cluster may be greatly reduced. If you do not want the system to balance automatically, you can set the FE configuration item `disable_colocate_balance` to prohibit automatic balancing. Then open it at the right time. (See Section `Advanced Operations` for details) - -## Query - -The Colocation table is queried in the same way as ordinary tables, and users do not need to perceive Colocation attributes. If the Group in which the Colocation table is located is in an Unstable state, it will automatically degenerate to a normal Join. - -Examples are given to illustrate: - -Table 1: - -``` -CREATE TABLE `tbl1` ( - `k1` date NOT NULL COMMENT "", - `k2` int(11) NOT NULL COMMENT "", - `v1` int(11) SUM NOT NULL COMMENT "" -) ENGINE=OLAP -AGGREGATE KEY(`k1`, `k2`) -PARTITION BY RANGE(`k1`) -( - PARTITION p1 VALUES LESS THAN ('2019-05-31'), - PARTITION p2 VALUES LESS THAN ('2019-06-30') -) -DISTRIBUTED BY HASH(`k2`) BUCKETS 8 -PROPERTIES ( - "colocate_with" = "group1" -); -``` - -Table 2: - -``` -CREATE TABLE `tbl2` ( - `k1` datetime NOT NULL COMMENT "", - `k2` int(11) NOT NULL COMMENT "", - `v1` double SUM NOT NULL COMMENT "" -) ENGINE=OLAP -AGGREGATE KEY(`k1`, `k2`) -DISTRIBUTED BY HASH(`k2`) BUCKETS 8 -PROPERTIES ( - "colocate_with" = "group1" -); -``` - -View the query plan: - -``` -DESC SELECT * FROM tbl1 INNER JOIN tbl2 ON (tbl1.k2 = tbl2.k2); - -+----------------------------------------------------+ -| Explain String | -+----------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS:`tbl1`.`k1` | | -| PARTITION: RANDOM | -| | -| RESULT SINK | -| | -| 2:HASH JOIN | -| | join op: INNER JOIN | -| | hash predicates: | -| | colocate: true | -| | `tbl1`.`k2` = `tbl2`.`k2` | -| | tuple ids: 0 1 | -| | | -| |----1:OlapScanNode | -| | TABLE: tbl2 | -| | PREAGGREGATION: OFF. Reason: null | -| | partitions=0/1 | -| | rollup: null | -| | buckets=0/0 | -| | cardinality=-1 | -| | avgRowSize=0.0 | -| | numNodes=0 | -| | tuple ids: 1 | -| | | -| 0:OlapScanNode | -| TABLE: tbl1 | -| PREAGGREGATION: OFF. Reason: No AggregateInfo | -| partitions=0/2 | -| rollup: null | -| buckets=0/0 | -| cardinality=-1 | -| avgRowSize=0.0 | -| numNodes=0 | -| tuple ids: 0 | -+----------------------------------------------------+ -``` - -If Colocation Join works, the Hash Join Node will show `colocate: true`. - -If not, the query plan is as follows: - -``` -+----------------------------------------------------+ -| Explain String | -+----------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS:`tbl1`.`k1` | | -| PARTITION: RANDOM | -| | -| RESULT SINK | -| | -| 2:HASH JOIN | -| | join op: INNER JOIN (BROADCAST) | -| | hash predicates: | -| | colocate: false, reason: group is not stable | -| | `tbl1`.`k2` = `tbl2`.`k2` | -| | tuple ids: 0 1 | -| | | -| |----3:EXCHANGE | -| | tuple ids: 1 | -| | | -| 0:OlapScanNode | -| TABLE: tbl1 | -| PREAGGREGATION: OFF. Reason: No AggregateInfo | -| partitions=0/2 | -| rollup: null | -| buckets=0/0 | -| cardinality=-1 | -| avgRowSize=0.0 | -| numNodes=0 | -| tuple ids: 0 | -| | -| PLAN FRAGMENT 1 | -| OUTPUT EXPRS: | -| PARTITION: RANDOM | -| | -| STREAM DATA SINK | -| EXCHANGE ID: 03 | -| UNPARTITIONED | -| | -| 1:OlapScanNode | -| TABLE: tbl2 | -| PREAGGREGATION: OFF. Reason: null | -| partitions=0/1 | -| rollup: null | -| buckets=0/0 | -| cardinality=-1 | -| avgRowSize=0.0 | -| numNodes=0 | -| tuple ids: 1 | -+----------------------------------------------------+ -``` - -The HASH JOIN node displays the corresponding reason: `colocate: false, reason: group is not stable`. At the same time, an EXCHANGE node will be generated. - - -## Advanced Operations - -### FE Configuration Item - -* disable\_colocate\_relocate - -Whether to close Doris's automatic Colocation replica repair. The default is false, i.e. not closed. This parameter only affects the replica repair of the Colocation table, but does not affect the normal table. - -* disable\_colocate\_balance - -Whether to turn off automatic Colocation replica balancing for Doris. The default is false, i.e. not closed. This parameter only affects the replica balance of the Collocation table, but does not affect the common table. - -User can set these configurations at runtime. See `HELP ADMIN SHOW CONFIG;` and `HELP ADMIN SET CONFIG;`. - -* disable\_colocate\_join - -Whether to turn off the Colocation Join function or not. In 0.10 and previous versions, the default is true, that is, closed. In a later version, it will default to false, that is, open. - -* use\_new\_tablet\_scheduler - -In 0.10 and previous versions, the new replica scheduling logic is incompatible with the Colocation Join function, so in 0.10 and previous versions, if `disable_colocate_join = false`, you need to set `use_new_tablet_scheduler = false`, that is, close the new replica scheduler. In later versions, `use_new_tablet_scheduler` will be equal to true. - -###HTTP Restful API - -Doris provides several HTTP Restful APIs related to Colocation Join for viewing and modifying Colocation Group. - -The API is implemented on the FE side and accessed using `fe_host: fe_http_port`. ADMIN privileges are required. - -1. View all Colocation information for the cluster - - ``` - GET /api/colocate - - Return the internal Colocation info in JSON format: - - { - "msg": "success", - "code": 0, - "data": { - "infos": [ - ["10003.12002", "10003_group1", "10037, 10043", "1", "1", "int(11)", "true"] - ], - "unstableGroupIds": [], - "allGroupIds": [{ - "dbId": 10003, - "grpId": 12002 - }] - }, - "count": 0 - } - ``` -2. Mark Group as Stable or Unstable - - * Mark as Stable - - ``` - POST /api/colocate/group_stable?db_id=10005&group_id=10008 - - Returns: 200 - ``` - - * Mark as Unstable - - ``` - DELETE /api/colocate/group_stable?db_id=10005&group_id=10008 - - Returns: 200 - ``` - -3. Setting Data Distribution for Group - - The interface can force the number distribution of a group. - - ``` - POST /api/colocate/bucketseq?db_id=10005&group_id=10008 - - Body: - [[10004,10002],[10003,10002],[10002,10004],[10003,10002],[10002,10004],[10003,10002],[10003,10004],[10003,10004],[10003,10004],[10002,10004]] - - Returns: 200 - ``` - Body is a Buckets Sequence represented by a nested array and the ID of the BE where the fragments are distributed in each Bucket. - - Note that using this command, you may need to set the FE configuration `disable_colocate_relocate` and `disable_colocate_balance` to true. That is to shut down the system for automatic Colocation replica repair and balancing. Otherwise, it may be automatically reset by the system after modification. diff --git a/docs/en/administrator-guide/config/be_config.md b/docs/en/administrator-guide/config/be_config.md deleted file mode 100644 index aa5a4dce50..0000000000 --- a/docs/en/administrator-guide/config/be_config.md +++ /dev/null @@ -1,1526 +0,0 @@ ---- -{ - "title": "BE Configuration", - "language": "en" -} ---- - - - - - -# BE Configuration - -This document mainly introduces the relevant configuration items of BE. - -The BE configuration file `be.conf` is usually stored in the `conf/` directory of the BE deployment path. In version 0.14, another configuration file `be_custom.conf` will be introduced. The configuration file is used to record the configuration items that are dynamically configured and persisted by the user during operation. - -After the BE process is started, it will read the configuration items in `be.conf` first, and then read the configuration items in `be_custom.conf`. The configuration items in `be_custom.conf` will overwrite the same configuration items in `be.conf`. - -The location of the `be_custom.conf` file can be configured in `be.conf` through the `custom_config_dir` configuration item. - -## View configuration items - -Users can view the current configuration items by visiting BE's web page: - -`http://be_host:be_webserver_port/varz` - -## Set configuration items - -There are two ways to configure BE configuration items: - -1. Static configuration - - Add and set configuration items in the `conf/be.conf` file. The configuration items in `be.conf` will be read when BE starts. Configuration items not in `be.conf` will use default values. - -2. Dynamic configuration - - After BE starts, the configuration items can be dynamically set with the following commands. - - ``` - curl -X POST http://{be_ip}:{be_http_port}/api/update_config?{key}={value}' - ``` - - In version 0.13 and before, the configuration items modified in this way will become invalid after the BE process restarts. In 0.14 and later versions, the modified configuration can be persisted through the following command. The modified configuration items are stored in the `be_custom.conf` file. - - ``` - curl -X POST http://{be_ip}:{be_http_port}/api/update_config?{key}={value}&persis=true' - ``` - -## Examples - -1. Modify `max_compaction_concurrency` statically - - By adding in the `be.conf` file: - - ```max_compaction_concurrency=5``` - - Then restart the BE process to take effect the configuration. - -2. Modify `streaming_load_max_mb` dynamically - - After BE starts, the configuration item `streaming_load_max_mb` is dynamically set by the following command: - - ``` - curl -X POST http://{be_ip}:{be_http_port}/api/update_config?streaming_load_max_mb=1024 - ``` - - The return value is as follows, indicating that the setting is successful. - - ``` - { - "status": "OK", - "msg": "" - } - ``` - - The configuration will become invalid after the BE restarts. If you want to persist the modified results, use the following command: - - ``` - curl -X POST http://{be_ip}:{be_http_port}/api/update_config?streaming_load_max_mb=1024\&persist=true - ``` - -## Configurations - -### `alter_tablet_worker_count` - -Default: 3 - -The number of threads making schema changes - -### `base_compaction_check_interval_seconds` - -Default: 60 (s) - -BaseCompaction thread polling interval - -### `base_compaction_interval_seconds_since_last_operation` - -Default: 86400 - -One of the triggering conditions of BaseCompaction: the interval since the last BaseCompaction - -### `base_compaction_num_cumulative_deltas` - -Default: 5 - -One of the triggering conditions of BaseCompaction: The limit of the number of Cumulative files to be reached. After reaching this limit, BaseCompaction will be triggered - -### base_compaction_trace_threshold - -* Type: int32 -* Description: Threshold to logging base compaction's trace information, in seconds -* Default value: 10 - -Base compaction is a long time cost background task, this configuration is the threshold to logging trace information. Trace information in log file looks like: - -``` -W0610 11:26:33.804431 56452 storage_engine.cpp:552] Trace: -0610 11:23:03.727535 (+ 0us) storage_engine.cpp:554] start to perform base compaction -0610 11:23:03.728961 (+ 1426us) storage_engine.cpp:560] found best tablet 546859 -0610 11:23:03.728963 (+ 2us) base_compaction.cpp:40] got base compaction lock -0610 11:23:03.729029 (+ 66us) base_compaction.cpp:44] rowsets picked -0610 11:24:51.784439 (+108055410us) compaction.cpp:46] got concurrency lock and start to do compaction -0610 11:24:51.784818 (+ 379us) compaction.cpp:74] prepare finished -0610 11:26:33.359265 (+101574447us) compaction.cpp:87] merge rowsets finished -0610 11:26:33.484481 (+125216us) compaction.cpp:102] output rowset built -0610 11:26:33.484482 (+ 1us) compaction.cpp:106] check correctness finished -0610 11:26:33.513197 (+ 28715us) compaction.cpp:110] modify rowsets finished -0610 11:26:33.513300 (+ 103us) base_compaction.cpp:49] compaction finished -0610 11:26:33.513441 (+ 141us) base_compaction.cpp:56] unused rowsets have been moved to GC queue -Metrics: {"filtered_rows":0,"input_row_num":3346807,"input_rowsets_count":42,"input_rowsets_data_size":1256413170,"input_segments_num":44,"merge_rowsets_latency_us":101574444,"merged_rows":0,"output_row_num":3346807,"output_rowset_data_size":1228439659,"output_segments_num":6} -``` - -### `base_compaction_write_mbytes_per_sec` - -Default: 5(MB) - -Maximum disk write speed per second of BaseCompaction task - -### `base_cumulative_delta_ratio` - -Default: 0.3 (30%) - -One of the trigger conditions of BaseCompaction: Cumulative file size reaches the proportion of Base file - -### `be_port` - -* Type: int32 -* Description: The port of the thrift server on BE which used to receive requests from FE -* Default value: 9060 - -### `be_service_threads` - -* Type: int32 -* Description: The number of execution threads of the thrift server service on BE which represents the number of threads that can be used to execute FE requests. -* Default value: 64 - -### `brpc_max_body_size` - -This configuration is mainly used to modify the parameter `max_body_size` of brpc. - -Sometimes the query fails and an error message of `body_size is too large` will appear in the BE log. This may happen when the SQL mode is "multi distinct + no group by + more than 1T of data". - -This error indicates that the packet size of brpc exceeds the configured value. At this time, you can avoid this error by increasing the configuration. - -### `brpc_socket_max_unwritten_bytes` - -This configuration is mainly used to modify the parameter `socket_max_unwritten_bytes` of brpc. - -Sometimes the query fails and an error message of `The server is overcrowded` will appear in the BE log. This means there are too many messages to buffer at the sender side, which may happen when the SQL needs to send large bitmap value. You can avoid this error by increasing the configuration. - -### `transfer_data_by_brpc_attachment` - -* Type: bool -* Description: This configuration is used to control whether to transfer the RowBatch in the ProtoBuf Request to the Controller Attachment and then send it through brpc. When the length of ProtoBuf Request exceeds 2G, an error will be reported: Bad request, error_text=[E1003]Fail to compress request, Putting RowBatch in Controller Attachment will be faster and avoid this error. -* Default value: false - -### `brpc_num_threads` - -This configuration is mainly used to modify the number of bthreads for brpc. The default value is set to -1, which means the number of bthreads is #cpu-cores. - -User can set this configuration to a larger value to get better QPS performance. For more information, please refer to `https://github.com/apache/incubator-brpc/blob/master/docs/cn/benchmark.md` - -### `brpc_port` - -* Type: int32 -* Description: The port of BRPC on BE, used for communication between BEs -* Default value: 9060 - -### `buffer_pool_clean_pages_limit` - -默认值: 20G - -Clean up pages that may be saved by the buffer pool - -### `buffer_pool_limit` - -* Type: string -* Description: The largest allocatable memory of the buffer pool -* Default value: 20% - -The maximum amount of memory available in the BE buffer pool. The buffer pool is a new memory management structure of BE, which manages the memory by the buffer page and enables spill data to disk. The memory for all concurrent queries will be allocated from the buffer pool. The current buffer pool only works on **AggregationNode** and **ExchangeNode**. - -### `check_auto_compaction_interval_seconds` - -* Type: int32 -* Description: Check the configuration of auto compaction in seconds when auto compaction disabled. -* Default value: 5 - -### `check_consistency_worker_count` - -Default: 1 - -The number of worker threads to calculate the checksum of the tablet - -### `chunk_reserved_bytes_limit` - -Default: 2147483648 - -The reserved bytes limit of Chunk Allocator is 2GB by default. Increasing this variable can improve performance, but it will get more free memory that other modules cannot use. - -### `clear_transaction_task_worker_count` - -Default: 1 - -Number of threads used to clean up transactions - -### `clone_worker_count` - -Default: 3 - -Number of threads used to perform cloning tasks - -### `cluster_id` - -* Type: int32 -* Description: Configure the cluster id to which the BE belongs. -* Default value: -1 - -This value is usually delivered by the FE to the BE by the heartbeat, no need to configure. When it is confirmed that a BE belongs to a certain Drois cluster, it can be configured. The cluster_id file under the data directory needs to be modified to make sure same as this parament. - -### `column_dictionary_key_ratio_threshold` - -Default: 0 - -The value ratio of string type, less than this ratio, using dictionary compression algorithm - -### `column_dictionary_key_size_threshold` - -Default: 0 - -Dictionary compression column size, less than this value using dictionary compression algorithm - -### `compaction_tablet_compaction_score_factor` - -* Type: int32 -* Description: Coefficient for compaction score when calculating tablet score to find a tablet for compaction. -* Default value: 1 - -### `compaction_tablet_scan_frequency_factor` - -* Type: int32 -* Description: Coefficient for tablet scan frequency when calculating tablet score to find a tablet for compaction. -* Default value: 0 - -Tablet scan frequency can be taken into consideration when selecting an tablet for compaction and preferentially do compaction for those tablets which are scanned frequently during a latest period of time at the present. -Tablet score can be calculated like this: - -tablet_score = compaction_tablet_scan_frequency_factor * tablet_scan_frequency + compaction_tablet_compaction_score_factor * compaction_score - -### `compaction_task_num_per_disk` - -* Type: int32 -* Description: The number of compaction tasks which execute in parallel for a disk(HDD). -* Default value: 2 - -### `compaction_task_num_per_fast_disk` - -* Type: int32 -* Description: The number of compaction tasks which execute in parallel for a fast disk(SSD). -* Default value: 4 - -### `compress_rowbatches` - -* Type: bool -* Description: enable to use Snappy compression algorithm for data compression when serializing RowBatch -* Default value: true - -### `create_tablet_worker_count` - -Default: 3 - -Number of worker threads for BE to create a tablet - -### `cumulative_compaction_rounds_for_each_base_compaction_round` - -* Type: int32 -* Description: How many rounds of cumulative compaction for each round of base compaction when compaction tasks generation. -* Default value: 9 - -### `disable_auto_compaction` - -* Type: bool -* Description: Whether disable automatic compaction task -* Default value: false - -Generally it needs to be turned off. When you want to manually operate the compaction task in the debugging or test environment, you can turn on the configuration. - -### `cumulative_compaction_budgeted_bytes` - -Default: 104857600 - -One of the trigger conditions of BaseCompaction: Singleton file size limit, 100MB - -### `cumulative_compaction_check_interval_seconds` - -Default: 10 (s) - -CumulativeCompaction thread polling interval - -### `cumulative_compaction_skip_window_seconds` - -Default: 30(s) - -CumulativeCompaction skips the most recently released increments to prevent compacting versions that may be queried (in case the query planning phase takes some time). Change the parameter is to set the skipped window time size - -### cumulative_compaction_trace_threshold - -* Type: int32 -* Description: Threshold to logging cumulative compaction's trace information, in seconds -* Default value: 10 - -Similar to `base_compaction_trace_threshold`. - -### disable_compaction_trace_log - -* Type: bool -* Description: disable the trace log of compaction -* Default value: true - -If set to true, the `cumulative_compaction_trace_threshold` and `base_compaction_trace_threshold` won't work and log is disabled. - -### `cumulative_compaction_policy` - -* Type: string -* Description: Configure the merge policy of the cumulative compaction stage. Currently, two merge policy have been implemented, num_based and size_based. -* Default value: size_based - -In detail, ordinary is the initial version of the cumulative compaction merge policy. After a cumulative compaction, the base compaction process is directly performed. The size_based policy is an optimized version of the ordinary strategy. Versions are merged only when the disk volume of the rowset is of the same order of magnitude. After the compaction, the output rowset which satisfies the conditions is promoted to the base compaction stage. In the case of a large number of small batch imports: reduce the write magnification of base compact, trade-off between read magnification and space magnification, and reducing file version data. - -### `cumulative_size_based_promotion_size_mbytes` - -* Type: int64 -* Description: Under the size_based policy, the total disk size of the output rowset of cumulative compaction exceeds this configuration size, and the rowset will be used for base compaction. The unit is m bytes. -* Default value: 1024 - -In general, if the configuration is less than 2G, in order to prevent the cumulative compression time from being too long, resulting in the version backlog. - -### `cumulative_size_based_promotion_ratio` - -* Type: double -* Description: Under the size_based policy, when the total disk size of the cumulative compaction output rowset exceeds the configuration ratio of the base version rowset, the rowset will be used for base compaction. -* Default value: 0.05 - -Generally, it is recommended that the configuration should not be higher than 0.1 and lower than 0.02. - -### `cumulative_size_based_promotion_min_size_mbytes` - -* Type: int64 -* Description: Under the size_based strategy, if the total disk size of the output rowset of the cumulative compaction is lower than this configuration size, the rowset will not undergo base compaction and is still in the cumulative compaction process. The unit is m bytes. -* Default value: 64 - -Generally, the configuration is within 512m. If the configuration is too large, the size of the early base version is too small, and base compaction has not been performed. - -### `cumulative_size_based_compaction_lower_size_mbytes` - -* Type: int64 -* Description: Under the size_based strategy, when the cumulative compaction is merged, the selected rowsets to be merged have a larger disk size than this configuration, then they are divided and merged according to the level policy. When it is smaller than this configuration, merge directly. The unit is m bytes. -* Default value: 64 - -Generally, the configuration is within 128m. Over configuration will cause more cumulative compaction write amplification. - -### `custom_config_dir` - -Configure the location of the `be_custom.conf` file. The default is in the `conf/` directory. - -In some deployment environments, the `conf/` directory may be overwritten due to system upgrades. This will cause the user modified configuration items to be overwritten. At this time, we can store `be_custom.conf` in another specified directory to prevent the configuration file from being overwritten. - -### `default_num_rows_per_column_file_block` - -* Type: int32 -* Description: Configure how many rows of data are contained in a single RowBlock. -* Default value: 1024 - -### `default_rowset_type` - -* Type: string -* Description: Identifies the storage format selected by BE by default. The configurable parameters are: "**ALPHA**", "**BETA**". Mainly play the following two roles -1. When the storage_format of the table is set to Default, select the storage format of BE through this configuration. -2. Select the storage format of when BE performing Compaction -* Default value: BETA - -### `delete_worker_count` - -Default: 3 - -Number of threads performing data deletion tasks - -### `disable_mem_pools` - -Default: false - -Whether to disable the memory cache pool, it is not disabled by default - -### `disable_storage_page_cache` - -* Type: bool -* Description: Disable to use page cache for index caching, this configuration only takes effect in BETA storage format, usually it is recommended to false -* Default value: false - -### `disk_stat_monitor_interval` - -Default: 5(s) - -Disk status check interval - -### `doris_cgroups` - -Default: empty - -Cgroups assigned to doris - -### `doris_max_pushdown_conjuncts_return_rate` - -* Type: int32 -* Description: When BE performs HashJoin, it will adopt a dynamic partitioning method to push the join condition to OlapScanner. When the data scanned by OlapScanner is larger than 32768 rows, BE will check the filter condition. If the filter rate of the filter condition is lower than this configuration, Doris will stop using the dynamic partition clipping condition for data filtering. -* Default value: 90 - -### `doris_max_scan_key_num` - -* Type: int -* Description: Used to limit the maximum number of scan keys that a scan node can split in a query request. When a conditional query request reaches the scan node, the scan node will try to split the conditions related to the key column in the query condition into multiple scan key ranges. After that, these scan key ranges will be assigned to multiple scanner threads for data scanning. A larger value usually means that more scanner threads can be used to increase the parallelism of the scanning operation. However, in high concurrency scenarios, too many threads may bring greater scheduling overhead and system load, and will slow down the query response speed. An empirical value is 50. This configuration can be configured separately at the session level. For details, please refer to the description of `max_scan_key_num` in [Variables](../variables.md). -* Default value: 1024 - -When the concurrency cannot be improved in high concurrency scenarios, try to reduce this value and observe the impact. - -### `doris_scan_range_row_count` - -* Type: int32 -* Description: When BE performs data scanning, it will split the same scanning range into multiple ScanRanges. This parameter represents the scan data range of each ScanRange. This parameter can limit the time that a single OlapScanner occupies the io thread. -* Default value: 524288 - -### `doris_scanner_queue_size` - -* Type: int32 -* Description: The length of the RowBatch buffer queue between TransferThread and OlapScanner. When Doris performs data scanning, it is performed asynchronously. The Rowbatch scanned by OlapScanner will be placed in the scanner buffer queue, waiting for the upper TransferThread to take it away. -* Default value: 1024 - -### `doris_scanner_row_num` - -Default: 16384 - -The maximum number of data rows returned by each scanning thread in a single execution - -### `doris_scanner_thread_pool_queue_size` - -* Type: int32 -* Description: The queue length of the Scanner thread pool. In Doris' scanning tasks, each Scanner will be submitted as a thread task to the thread pool waiting to be scheduled, and after the number of submitted tasks exceeds the length of the thread pool queue, subsequent submitted tasks will be blocked until there is a empty slot in the queue. -* Default value: 102400 - -### `doris_scanner_thread_pool_thread_num` - -* Type: int32 -* Description: The number of threads in the Scanner thread pool. In Doris' scanning tasks, each Scanner will be submitted as a thread task to the thread pool to be scheduled. This parameter determines the size of the Scanner thread pool. -* Default value: 48 - -### `download_low_speed_limit_kbps` - -Default: 50 (KB/s) - -Minimum download speed - -### `download_low_speed_time` - -Default: 300(s) - -Download time limit, 300 seconds by default - -### `download_worker_count` - -Default: 1 - -The number of download threads, the default is 1 - -### `drop_tablet_worker_count` - -Default: 3 - -Number of threads to delete tablet - -### `enable_metric_calculator` - -Default: true - -If set to true, the metric calculator will run to collect BE-related indicator information, if set to false, it will not run - -### `enable_partitioned_aggregation` - -* Type: bool -* Description: Whether the BE node implements the aggregation operation by PartitionAggregateNode, if false, AggregateNode will be executed to complete the aggregation. It is not recommended to set it to false in non-special demand scenarios. -* Default value: true - -### `enable_prefetch` -* Type: bool -* Description: When using PartitionedHashTable for aggregation and join calculations, whether to perform HashBuket prefetch. Recommended to be set to true -* Default value: true - -### `enable_quadratic_probing` - -* Type: bool -* Description: When a Hash conflict occurs when using PartitionedHashTable, enable to use the square detection method to resolve the Hash conflict. If the value is false, linear detection is used to resolve the Hash conflict. For the square detection method, please refer to: [quadratic_probing](https://en.wikipedia.org/wiki/Quadratic_probing) -* Default value: true - -### `enable_system_metrics` - -Default: true - -User control to turn on and off system indicators. - -### `enable_token_check` - -Default: true - -Used for forward compatibility, will be removed later. - -### `es_http_timeout_ms` - -Default: 5000 (ms) - -The timeout period for connecting to ES via http, the default is 5 seconds. - -### `es_scroll_keepalive` - -Default: 5m - -es scroll Keeplive hold time, the default is 5 minutes - -### `etl_thread_pool_queue_size` - -Default: 256 - -The size of the ETL thread pool - -### `etl_thread_pool_size` - -### `exchg_node_buffer_size_bytes` - -* Type: int32 -* Description: The size of the Buffer queue of the ExchangeNode node, in bytes. After the amount of data sent from the Sender side is larger than the Buffer size of ExchangeNode, subsequent data sent will block until the Buffer frees up space for writing. -* Default value: 10485760 - -### `file_descriptor_cache_capacity` - -Default: 32768 - -File handle cache capacity, 32768 file handles are cached by default. - -### `cache_clean_interval` - -Default: 1800(s) - -File handle cache cleaning interval, used to clean up file handles that have not been used for a long time. -Also the clean interval of Segment Cache. - -### `flush_thread_num_per_store` - -Default: 2 - -The number of threads used to refresh the memory table per store - -### `force_recovery` - -### `fragment_pool_queue_size` - -Default: 2048 - -The upper limit of query requests that can be processed on a single node - -### `fragment_pool_thread_num_min` - -Default: 64 - -### `fragment_pool_thread_num_max` - -Default: 256 - -The above two parameters are to set the number of query threads. By default, a minimum of 64 threads will be started, subsequent query requests will dynamically create threads, and a maximum of 256 threads will be created. - -### `heartbeat_service_port` -* Type: int32 -* Description: Heartbeat service port (thrift) on BE, used to receive heartbeat from FE -* Default value: 9050 - -### `heartbeat_service_thread_count` - -* Type: int32 -* Description: The number of threads that execute the heartbeat service on BE. the default is 1, it is not recommended to modify -* Default value: 1 - -### `ignore_broken_disk` - -Default: false - -When BE start, If there is a broken disk, BE process will exit by default.Otherwise, we will ignore the broken disk - -### `ignore_load_tablet_failure` -When BE starts, it will check all the paths under the storage_root_path in configuration. - -`ignore_broken_disk=true` - -If the path does not exist or the file under the path cannot be read or written (broken disk), it will be ignored. If there are any other available paths, the startup will not be interrupted. - -`ignore_broken_disk=false` - -If the path does not exist or the file under the path cannot be read or written (bad disk), the startup will fail and exit. - -The default value is false. -``` -load tablets from header failed, failed tablets size: xxx, path=xxx -``` - -Indicates how many tablets in the data directory failed to load. At the same time, the log will also contain specific information about the tablet that failed to load. At this time, manual intervention is required to troubleshoot the cause of the error. After investigation, there are usually two ways to recover: - -1. The tablet information cannot be repaired. If the other copies are normal, you can delete the wrong tablet with the `meta_tool` tool. -2. Set `ignore_load_tablet_failure` to true, BE will ignore these faulty tablets and start normally - -### ignore_rowset_stale_unconsistent_delete - -* Type: boolean -* Description:It is used to decide whether to delete the outdated merged rowset if it cannot form a consistent version path. -* Default: false - -The merged expired rowset version path will be deleted after half an hour. In abnormal situations, deleting these versions will result in the problem that the consistent path of the query cannot be constructed. When the configuration is false, the program check is strict and the program will directly report an error and exit. -When configured as true, the program will run normally and ignore this error. In general, ignoring this error will not affect the query, only when the merged version is dispatched by fe, -230 error will appear. - -### inc_rowset_expired_sec - -Default: 1800 (s) - -Import activated data, storage engine retention time, used for incremental cloning - -### `index_stream_cache_capacity` - -Default: 10737418240 - -BloomFilter/Min/Max and other statistical information cache capacity - -### `kafka_broker_version_fallback` - -Default: 0.10.0 - -If the dependent Kafka version is lower than the Kafka client version that routine load depends on, the value set by the fallback version kafka_broker_version_fallback will be used, and the valid values are: 0.9.0, 0.8.2, 0.8.1, 0.8.0. - -### `load_data_reserve_hours` - -Default: 4(hour) - -Used for mini load. The mini load data file will be deleted after this time - -### `load_error_log_reserve_hours` - -Default: 48 (hour) - -The load error log will be deleted after this time - -### `load_process_max_memory_limit_bytes` - -Default: 107374182400 - -The upper limit of memory occupied by all imported threads on a single node, default value: 100G - -Set these default values very large, because we don't want to affect load performance when users upgrade Doris. If necessary, the user should set these configurations correctly. - -### `load_process_max_memory_limit_percent` - -Default: 80 (%) - -The percentage of the upper memory limit occupied by all imported threads on a single node, the default is 80% - -Set these default values very large, because we don't want to affect load performance when users upgrade Doris. If necessary, the user should set these configurations correctly - -### `log_buffer_level` - -Default: empty - -The log flushing strategy is kept in memory by default - -### `madvise_huge_pages` - -Default: false - -Whether to use linux memory huge pages, not enabled by default - -### `make_snapshot_worker_count` - -Default: 5 - -Number of threads making snapshots - -### `max_client_cache_size_per_host` - -Default: 10 - -The maximum number of client caches per host. There are multiple client caches in BE, but currently we use the same cache size configuration. If necessary, use different configurations to set up different client-side caches - -### `max_compaction_threads` - -* Type: int32 -* Description: The maximum of thread number in compaction thread pool. -* Default value: 10 - -### `max_consumer_num_per_group` - -Default: 3 - -The maximum number of consumers in a data consumer group, used for routine load - -### `min_cumulative_compaction_num_singleton_deltas` - -Default: 5 - -Cumulative compaction strategy: the minimum number of incremental files - -### `max_cumulative_compaction_num_singleton_deltas` - -Default: 1000 - -Cumulative compaction strategy: the maximum number of incremental files - -### `max_download_speed_kbps` - -Default: 50000 (KB/s) - -Maximum download speed limit - -### `max_free_io_buffers` - -Default: 128 - -For each io buffer size, the maximum number of buffers that IoMgr will reserve ranges from 1024B to 8MB buffers, up to about 2GB buffers. - -### `max_garbage_sweep_interval` - -Default: 3600 - -The maximum interval for disk garbage cleaning, the default is one hour - -### `max_memory_sink_batch_count` - -Default: 20 - -The maximum external scan cache batch count, which means that the cache max_memory_cache_batch_count * batch_size row, the default is 20, and the default value of batch_size is 1024, which means that 20 * 1024 rows will be cached - -### `max_percentage_of_error_disk` - -* Type: int32 -* Description: The storage engine allows the percentage of damaged hard disks to exist. After the damaged hard disk exceeds the changed ratio, BE will automatically exit. -* Default value: 0 - -### `max_pushdown_conditions_per_column` - -* Type: int -* Description: Used to limit the maximum number of conditions that can be pushed down to the storage engine for a single column in a query request. During the execution of the query plan, the filter conditions on some columns can be pushed down to the storage engine, so that the index information in the storage engine can be used for data filtering, reducing the amount of data that needs to be scanned by the query. Such as equivalent conditions, conditions in IN predicates, etc. In most cases, this parameter only affects queries containing IN predicates. Such as `WHERE colA IN (1,2,3,4, ...)`. A larger number means that more conditions in the IN predicate can be pushed to the storage engine, but too many conditions may cause an increase in random reads, and in some cases may reduce query efficiency. This configuration can be individually configured for session level. For details, please refer to the description of `max_pushdown_conditions_per_column` in [Variables](../ variables.md). -* Default value: 1024 - -* Example - - The table structure is `id INT, col2 INT, col3 varchar (32), ...`. - - The query is `... WHERE id IN (v1, v2, v3, ...)` - - If the number of conditions in the IN predicate exceeds the configuration, try to increase the configuration value and observe whether the query response has improved. - -### `max_runnings_transactions_per_txn_map` - -Default: 100 - -Max number of txns for every txn_partition_map in txn manager, this is a self protection to avoid too many txns saving in manager - -### `max_send_batch_parallelism_per_job` - -* Type: int -* Description: Max send batch parallelism for OlapTableSink. The value set by the user for `send_batch_parallelism` is not allowed to exceed `max_send_batch_parallelism_per_job`, if exceed, the value of `send_batch_parallelism` would be `max_send_batch_parallelism_per_job`. -* Default value: 5 - -### `max_tablet_num_per_shard` - -Default: 1024 - -The number of sliced tablets, plan the layout of the tablet, and avoid too many tablet subdirectories in the repeated directory - -### `max_tablet_version_num` - -* Type: int -* Description: Limit the number of versions of a single tablet. It is used to prevent a large number of version accumulation problems caused by too frequent import or untimely compaction. When the limit is exceeded, the import task will be rejected. -* Default value: 500 - -### `mem_limit` - -* Type: string -* Description: Limit the percentage of the server's maximum memory used by the BE process. It is used to prevent BE memory from occupying to many the machine's memory. This parameter must be greater than 0. When the percentage is greater than 100%, the value will default to 100%. -* Default value: 80% - -### `memory_limitation_per_thread_for_schema_change` - -Default: 2 (G) - -Maximum memory allowed for a single schema change task - -### `memory_maintenance_sleep_time_s` - -Default: 10 - -Sleep time (in seconds) between memory maintenance iterations - -### `memory_max_alignment` - -Default: 16 - -Maximum alignment memory - -### `read_size` - -Default: 8388608 - -The read size is the read size sent to the os. There is a trade-off between latency and the whole process, getting to keep the disk busy but not introducing seeks. For 8 MB reads, random io and sequential io have similar performance - -### `min_buffer_size` - -Default: 1024 - -Minimum read buffer size (in bytes) - -### `min_compaction_failure_interval_sec` - -* Type: int32 -* Description: During the cumulative compaction process, when the selected tablet fails to be merged successfully, it will wait for a period of time before it may be selected again. The waiting period is the value of this configuration. -* Default value: 5 -* Unit: seconds - -### `min_compaction_threads` - -* Type: int32 -* Description: The minimum of thread number in compaction thread pool. -* Default value: 10 - -### `min_file_descriptor_number` - -Default: 60000 - -The lower limit required by the file handle limit of the BE process - -### `min_garbage_sweep_interval` - -Default: 180 - -The minimum interval between disk garbage cleaning, time seconds - -### `mmap_buffers` - -Default: false - -Whether to use mmap to allocate memory, not used by default - -### `num_cores` - -* Type: int32 -* Description: The number of CPU cores that BE can use. When the value is 0, BE will obtain the number of CPU cores of the machine from /proc/cpuinfo. -* Default value: 0 - -### `num_disks` - -Defalut: 0 - -Control the number of disks on the machine. If it is 0, it comes from the system settings - -### `num_threads_per_core` - -Default: 3 - -Control the number of threads that each core runs. Usually choose 2 times or 3 times the number of cores. This keeps the core busy without causing excessive jitter - -### `num_threads_per_disk` - -Default: 0 - -The maximum number of threads per disk is also the maximum queue depth of each disk - -### `number_tablet_writer_threads` - -Default: 16 - -Number of tablet write threads - -### `path_gc_check` - -Default: true - -Whether to enable the recycle scan data thread check, it is enabled by default - -### `path_gc_check_interval_second` - -Default: 86400 - -Recycle scan data thread check interval, in seconds - -### `path_gc_check_step` - -Default: 1000 - -### `path_gc_check_step_interval_ms` - -Default: 10 (ms) - -### `path_scan_interval_second` - -Default: 86400 - -### `pending_data_expire_time_sec` - -Default: 1800 - -The maximum duration of unvalidated data retained by the storage engine, the default unit: seconds - -### `periodic_counter_update_period_ms` - -Default: 500 - -Update rate counter and sampling counter cycle, default unit: milliseconds - -### `plugin_path` - -Default: ${DORIS_HOME}/plugin - -pliugin path - -### `port` - -* Type: int32 -* Description: The port used in UT. Meaningless in the actual environment and can be ignored. -* Default value: 20001 - -### `pprof_profile_dir` - -Default : ${DORIS_HOME}/log - -pprof profile save directory - -### `priority_networks` - -Default: empty - -Declare a selection strategy for those servers with many IPs. Note that at most one ip should match this list. This is a semicolon-separated list in CIDR notation, such as 10.10.10.0/24. If there is no IP matching this rule, one will be randomly selected - -### `priority_queue_remaining_tasks_increased_frequency` - -Default: 512 - - the increased frequency of priority for remaining tasks in BlockingPriorityQueue - -### `publish_version_worker_count` - -Default: 8 - -the count of thread to publish version - -### `pull_load_task_dir` - -Default: ${DORIS_HOME}/var/pull_load - -Pull the directory of the laod task - -### `push_worker_count_high_priority` - -Default: 3 - -Import the number of threads for processing HIGH priority tasks - -### `push_worker_count_normal_priority` - -Default: 3 - -Import the number of threads for processing NORMAL priority tasks - -### `push_write_mbytes_per_sec` - -+ Type: int32 -+ Description: Load data speed control, the default is 10MB per second. Applicable to all load methods. -+ Unit: MB -+ Default value: 10 - -### `query_scratch_dirs` - -+ Type: string -+ Description: The directory selected by BE to store temporary data during spill to disk. which is similar to the storage path configuration, multiple directories are separated by ;. -+ Default value: ${DORIS_HOME} - -### `release_snapshot_worker_count` - -Default: 5 - -Number of threads releasing snapshots - -### `report_disk_state_interval_seconds` - -Default: 60 - -The interval time for the agent to report the disk status to FE, unit (seconds) - -### `report_tablet_interval_seconds` - -Default: 60 - -The interval time for the agent to report the olap table to the FE, in seconds - -### `report_task_interval_seconds` - -Default: 10 - -The interval time for the agent to report the task signature to FE, unit (seconds) - -### `result_buffer_cancelled_interval_time` - -Default: 300 - -Result buffer cancellation time (unit: second) - -### `routine_load_thread_pool_size` - -Default: 10 - -The thread pool size of the routine load task. This should be greater than the FE configuration'max_concurrent_task_num_per_be' (default 5) - -### `row_nums_check` - -Default: true - -Check row nums for BE/CE and schema change. true is open, false is closed - -### `row_step_for_compaction_merge_log` - -* Type: int64 -* Description: Merge log will be printed for each "row_step_for_compaction_merge_log" rows merged during compaction. If the value is set to 0, merge log will not be printed. -* Default value: 0 -* Dynamically modify: true - -### `scan_context_gc_interval_min` - -Default: 5 - -This configuration is used for the context gc thread scheduling cycle. Note: The unit is minutes, and the default is 5 minutes - -### `send_batch_thread_pool_thread_num` - -* Type: int32 -* Description: The number of threads in the SendBatch thread pool. In NodeChannels' sending data tasks, the SendBatch operation of each NodeChannel will be submitted as a thread task to the thread pool to be scheduled. This parameter determines the size of the SendBatch thread pool. -* Default value: 256 - -### `send_batch_thread_pool_queue_size` - -* Type: int32 -* Description: The queue length of the SendBatch thread pool. In NodeChannels' sending data tasks, the SendBatch operation of each NodeChannel will be submitted as a thread task to the thread pool waiting to be scheduled, and after the number of submitted tasks exceeds the length of the thread pool queue, subsequent submitted tasks will be blocked until there is a empty slot in the queue. - -### `sleep_one_second` - -+ Type: int32 -+ Description: Global variables, used for BE thread sleep for 1 seconds, should not be modified -+ Default value: 1 - -### `small_file_dir` - -Default: ${DORIS_HOME}/lib/small_file/ - -Directory for saving files downloaded by SmallFileMgr - -### `snapshot_expire_time_sec` - -Default: 172800 - -Snapshot file cleaning interval, default value: 48 hours - -### `status_report_interval` - -Default: 5 - -Interval between profile reports; unit: seconds - -### `storage_flood_stage_left_capacity_bytes` - -Default: 1073741824 - -The min bytes that should be left of a data dir,default value:1G - -### `storage_flood_stage_usage_percent` - -Default: 95 (95%) - -The storage_flood_stage_usage_percent and storage_flood_stage_left_capacity_bytes configurations limit the maximum usage of the capacity of the data directory. - -### `storage_medium_migrate_count` - -Default: 1 - -the count of thread to clone - -### `storage_page_cache_limit` - -Default: 20% - -Cache for storage page size - -### `index_page_cache_percentage` -* Type: int32 -* Description: Index page cache as a percentage of total storage page cache, value range is [0, 100] -* Default value: 10 - -### `storage_root_path` - -* Type: string - -* Description: data root path, separate by ';'.you can specify the storage medium of each root path, HDD or SSD. you can add capacity limit at the end of each root path, seperate by ',' - - eg.1: `storage_root_path=/home/disk1/doris.HDD,50;/home/disk2/doris.SSD,1;/home/disk2/doris` - - * 1./home/disk1/doris.HDD,50, indicates capacity limit is 50GB, HDD; - * 2./home/disk2/doris.SSD,1, indicates capacity limit is 1GB, SSD; - * 3./home/disk2/doris, indicates capacity limit is disk capacity, HDD(default) - - eg.2: `storage_root_path=/home/disk1/doris,medium:hdd,capacity:50;/home/disk2/doris,medium:ssd,capacity:50` - - * 1./home/disk1/doris,medium:hdd,capacity:10,capacity limit is 10GB, HDD; - * 2./home/disk2/doris,medium:ssd,capacity:50,capacity limit is 50GB, SSD; - -* Default: ${DORIS_HOME} - -### `storage_strict_check_incompatible_old_format` -* Type: bool -* Description: Used to check incompatible old format strictly -* Default value: true -* Dynamically modify: false - -This config is used to check incompatible old format hdr_ format whether doris uses strict way. When config is true, -process will log fatal and exit. When config is false, process will only log warning. - -### `streaming_load_max_mb` - -* Type: int64 -* Description: Used to limit the maximum amount of csv data allowed in one Stream load. The unit is MB. -* Default value: 10240 -* Dynamically modify: yes - -Stream Load is generally suitable for loading data less than a few GB, not suitable for loading` too large data. - -### `streaming_load_json_max_mb` - -* Type: int64 -* Description: it is used to limit the maximum amount of json data allowed in one Stream load. The unit is MB. -* Default value: 100 -* Dynamically modify: yes - -Some data formats, such as JSON, cannot be split. Doris must read all the data into the memory before parsing can begin. Therefore, this value is used to limit the maximum amount of data that can be loaded in a single Stream load. - -### `streaming_load_rpc_max_alive_time_sec` - -Default: 1200 - -The lifetime of TabletsChannel. If the channel does not receive any data at this time, the channel will be deleted, unit: second - -### `sync_tablet_meta` - -Default: false - -Whether the storage engine opens sync and keeps it to the disk - -### `sys_log_dir` - -* Type: string -* Description: Storage directory of BE log data -* Default: ${DORIS_HOME}/log - -### `sys_log_level` - -INFO - -Log Level: INFO < WARNING < ERROR < FATAL - -### `sys_log_roll_mode` - -Default: SIZE-MB-1024 - -The size of the log split, one log file is split every 1G - -### `sys_log_roll_num` - -Default: 10 - -Number of log files kept - -### `sys_log_verbose_level` - -Defaultl: 10 - -Log display level, used to control the log output at the beginning of VLOG in the code - -### `sys_log_verbose_modules` - -Default: empty - -Log printing module, writing olap will only print the log under the olap module - -### `tablet_map_shard_size` - -Default: 1 - -tablet_map_lock fragment size, the value is 2^n, n=0,1,2,3,4, this is for better tablet management - -### `tablet_meta_checkpoint_min_interval_secs` - -Default: 600(s) - -The polling interval of the TabletMeta Checkpoint thread - -### `tablet_meta_checkpoint_min_new_rowsets_num` - -### `tablet_scan_frequency_time_node_interval_second` - -* Type: int64 -* Description: Time interval to record the metric 'query_scan_count' and timestamp in second for the purpose of calculating tablet scan frequency during a latest period of time at the present. -* Default: 300 - -### `tablet_stat_cache_update_interval_second` - -默认值: 10 - -The minimum number of Rowsets for TabletMeta Checkpoint - -### `tablet_rowset_stale_sweep_time_sec` - -* Type: int64 -* Description: It is used to control the expiration time of cleaning up the merged rowset version. When the current time now() minus the max created rowset‘s create time in a version path is greater than tablet_rowset_stale_sweep_time_sec, the current path is cleaned up and these merged rowsets are deleted, the unit is second. -* Default: 1800 - -When writing is too frequent and the disk time is insufficient, you can configure less tablet_rowset_stale_sweep_time_sec. However, if this time is less than 5 minutes, it may cause fe to query the version that has been merged, causing a query -230 error. - -### `tablet_writer_open_rpc_timeout_sec` - -Default: 300 - -Update interval of tablet state cache, unit: second - -### `tablet_writer_ignore_eovercrowded` - -* Type: bool -* Description: Used to ignore brpc error '[E1011]The server is overcrowded' when writing data. -* Default value: false - -When meet '[E1011]The server is overcrowded' error, you can tune the configuration `brpc_socket_max_unwritten_bytes`, but it can't be modified at runtime. Set it to `true` to avoid writing failed temporarily. Notice that, it only effects `write`, other rpc requests will still check if overcrowded. - -### `tc_free_memory_rate` - -Default: 20 (%) - -Available memory, value range: [0-100] - -### `tc_max_total_thread_cache_bytes` - -* Type: int64 -* Description: Used to limit the total thread cache size in tcmalloc. This limit is not a hard limit, so the actual thread cache usage may exceed this limit. For details, please refer to [TCMALLOC\_MAX\_TOTAL\_THREAD\_CACHE\_BYTES](https://gperftools.github.io/gperftools/tcmalloc.html) -* Default: 1073741824 - -If the system is found to be in a high-stress scenario and a large number of threads are found in the tcmalloc lock competition phase through the BE thread stack, such as a large number of `SpinLock` related stacks, you can try increasing this parameter to improve system performance. [Reference](https://github.com/gperftools/gperftools/issues/1111) - -### `tc_use_memory_min` - -Default: 10737418240 - -The minimum memory of TCmalloc, when the memory used is less than this, it is not returned to the operating system - -### `thrift_client_retry_interval_ms` - -* Type: int64 -* Description: Used to set retry interval for thrift client in be to avoid avalanche disaster in fe thrift server, the unit is ms. -* Default: 1000 - -### `thrift_connect_timeout_seconds` - -Default: 3 - -The default thrift client connection timeout time (unit: seconds) - -### `thrift_rpc_timeout_ms` - -Default: 5000 - -thrift default timeout time, default: 5 seconds - -### `thrift_server_type_of_fe` - -This configuration indicates the service model used by FE's Thrift service. The type is string and is case-insensitive. This parameter needs to be consistent with the setting of fe's thrift_server_type parameter. Currently there are two values for this parameter, `THREADED` and `THREAD_POOL`. - -If the parameter is `THREADED`, the model is a non-blocking I/O model, - -If the parameter is `THREAD_POOL`, the model is a blocking I/O model. - -### `total_permits_for_compaction_score` - -* Type: int64 -* Description: The upper limit of "permits" held by all compaction tasks. This config can be set to limit memory consumption for compaction. -* Default: 10000 -* Dynamically modify: true - -### `trash_file_expire_time_sec` - -Default: 259200 - -The interval for cleaning the recycle bin is 72 hours. When the disk space is insufficient, the file retention period under trash may not comply with this parameter - -### `txn_commit_rpc_timeout_ms` - -Default: 10000 - -txn submit rpc timeout, the default is 10 seconds - -### `txn_map_shard_size` - -Default: 128 - -txn_map_lock fragment size, the value is 2^n, n=0,1,2,3,4. This is an enhancement to improve the performance of managing txn - -### `txn_shard_size` - -Default: 1024 - -txn_lock shard size, the value is 2^n, n=0,1,2,3,4, this is an enhancement function that can improve the performance of submitting and publishing txn - -### `unused_rowset_monitor_interval` - -Default: 30 - -Time interval for clearing expired Rowset, unit: second - -### `upload_worker_count` - -Default: 1 - -Maximum number of threads for uploading files - -### `use_mmap_allocate_chunk` - -Default: false - -Whether to use mmap to allocate blocks. If you enable this feature, it is best to increase the value of vm.max_map_count, its default value is 65530. You can use "sysctl -w vm.max_map_count=262144" or "echo 262144> /proc/sys/vm/" to operate max_map_count as root. When this setting is true, you must set chunk_reserved_bytes_limit to a relatively low Big number, otherwise the performance is very very bad - -### `user_function_dir` - -${DORIS_HOME}/lib/udf - -udf function directory - -### `webserver_num_workers` - -Default: 48 - -Webserver default number of worker threads - -### `webserver_port` - -* Type: int32 -* Description: Service port of http server on BE -* Default: 8040 - -### `write_buffer_size` - -Default: 104857600 - -The size of the buffer before flashing - -### `zone_map_row_num_threshold` - -* Type: int32 -* Description: If the number of rows in a page is less than this value, no zonemap will be created to reduce data expansion -* Default: 20 - -### `aws_log_level` - -* Type: int32 - -* Description: log level of AWS SDK, - ``` - Off = 0, - Fatal = 1, - Error = 2, - Warn = 3, - Info = 4, - Debug = 5, - Trace = 6 - ``` - -* Default: 3 - -### `track_new_delete` - -* Type: bool -* Description: Whether Hook TCmalloc new/delete, currently consume/release tls mem tracker in Hook. -* Default: true - -### `mem_tracker_level` - -* Type: int16 -* Description: The level at which MemTracker is displayed on the Web page equal or lower than this level will be displayed on the Web page - ``` - OVERVIEW = 0 - TASK = 1 - INSTANCE = 2 - VERBOSE = 3 - ``` -* Default: 0 - -### `mem_tracker_consume_min_size_bytes` - -* Type: int32 -* Description: The minimum length of TCMalloc Hook when consume/release MemTracker. Consume size smaller than this value will continue to accumulate to avoid frequent calls to consume/release of MemTracker. Decreasing this value will increase the frequency of consume/release. Increasing this value will cause MemTracker statistics to be inaccurate. Theoretically, the statistical value of a MemTracker differs from the true value = ( mem_tracker_consume_min_size_bytes * the number of BE threads where the MemTracker is located). -* Default: 1048576 - -### `memory_leak_detection` - -* Type: bool -* Description: Whether to start memory leak detection, when MemTracker is a negative value, it is considered that a memory leak has occurred, but the actual MemTracker records inaccurately will also cause a negative value, so this feature is in the experimental stage. -* Default: false - -### `max_segment_num_per_rowset` - -* Type: int32 -* Description: Used to limit the number of segments in the newly generated rowset when importing. If the threshold is exceeded, the import will fail with error -238. Too many segments will cause compaction to take up a lot of memory and cause OOM errors. -* Default value: 200 - -### `remote_storage_read_buffer_mb` - -* Type: int32 -* Description: The cache size used when reading files on hdfs or object storage. -* Default value: 16MB - -Increasing this value can reduce the number of calls to read remote data, but it will increase memory overhead. - -### `external_table_connect_timeout_sec` - -* Type: int32 -* Description: The timeout when establishing connection with external table such as ODBC table. -* Default value: 5 seconds - -### `segment_cache_capacity` - -* Type: int32 -* Description: The maximum number of Segments cached by Segment Cache. -* Default value: 1000000 - -The default value is currently only an empirical value, and may need to be modified according to actual scenarios. Increasing this value can cache more segments and avoid some IO. Decreasing this value will reduce memory usage. - -### `auto_refresh_brpc_channel` - -* Type: bool -* Description: When obtaining a brpc connection, judge the availability of the connection through hand_shake rpc, and re-establish the connection if it is not available . -* Default value: false - -### `high_priority_flush_thread_num_per_store` - -* Type: int32 -* Description: The number of flush threads per store path allocated for the high priority import task. -* Default value: 1 - -### `routine_load_consumer_pool_size` - -* Type: int32 -* Description: The number of caches for the data consumer used by the routine load. -* Default: 10 - -### `load_task_high_priority_threshold_second` - -* Type: int32 -* Description: When the timeout of an import task is less than this threshold, Doris will consider it to be a high priority task. High priority tasks use a separate pool of flush threads. -* Default: 120 - -### `min_load_rpc_timeout_ms` - -* Type: int32 -* Description: The minimum timeout for each rpc in the load job. -* Default: 20 - -Translated with www.DeepL.com/Translator (free version) - -### `doris_scan_range_max_mb` -* Type: int32 -* Description: The maximum amount of data read by each OlapScanner. -* Default: 1024 - -### `string_type_length_soft_limit_bytes` -* Type: int32 -* Description: A soft limit of string type length. -* Description: 1048576 diff --git a/docs/en/administrator-guide/config/fe_config.md b/docs/en/administrator-guide/config/fe_config.md deleted file mode 100644 index f34658653c..0000000000 --- a/docs/en/administrator-guide/config/fe_config.md +++ /dev/null @@ -1,2210 +0,0 @@ ---- -{ - "title": "FE Configuration", - "language": "en" -} ---- - - - - - -# FE Configuration - -This document mainly introduces the relevant configuration items of FE. - -The FE configuration file `fe.conf` is usually stored in the `conf/` directory of the FE deployment path. In version 0.14, another configuration file `fe_custom.conf` will be introduced. The configuration file is used to record the configuration items that are dynamically configured and persisted by the user during operation. - -After the FE process is started, it will read the configuration items in `fe.conf` first, and then read the configuration items in `fe_custom.conf`. The configuration items in `fe_custom.conf` will overwrite the same configuration items in `fe.conf`. - -The location of the `fe_custom.conf` file can be configured in `fe.conf` through the `custom_config_dir` configuration item. - -## View configuration items - -There are two ways to view the configuration items of FE: - -1. FE web page - - Open the FE web page `http://fe_host:fe_http_port/variable` in the browser. You can see the currently effective FE configuration items in `Configure Info`. - -2. View by command - - After the FE is started, you can view the configuration items of the FE in the MySQL client with the following command: - - `ADMIN SHOW FRONTEND CONFIG;` - - The meanings of the columns in the results are as follows: - - * Key: the name of the configuration item. - * Value: The value of the current configuration item. - * Type: The configuration item value type, such as integer or string. - * IsMutable: whether it can be dynamically configured. If true, the configuration item can be dynamically configured at runtime. If false, it means that the configuration item can only be configured in `fe.conf` and takes effect after restarting FE. - * MasterOnly: Whether it is a unique configuration item of Master FE node. If it is true, it means that the configuration item is meaningful only at the Master FE node, and is meaningless to other types of FE nodes. If false, it means that the configuration item is meaningful in all types of FE nodes. - * Comment: The description of the configuration item. - -## Set configuration items - -There are two ways to configure FE configuration items: - -1. Static configuration - - Add and set configuration items in the `conf/fe.conf` file. The configuration items in `fe.conf` will be read when the FE process starts. Configuration items not in `fe.conf` will use default values. - -2. Dynamic configuration via MySQL protocol - - After the FE starts, you can set the configuration items dynamically through the following commands. This command requires administrator privilege. - - `ADMIN SET FRONTEND CONFIG (" fe_config_name "=" fe_config_value ");` - - Not all configuration items support dynamic configuration. You can check whether the dynamic configuration is supported by the `IsMutable` column in the` ADMIN SHOW FRONTEND CONFIG; `command result. - - If the configuration item of `MasterOnly` is modified, the command will be directly forwarded to the Master FE and only the corresponding configuration item in the Master FE will be modified. - - **Configuration items modified in this way will become invalid after the FE process restarts.** - - For more help on this command, you can view it through the `HELP ADMIN SET CONFIG;` command. - -3. Dynamic configuration via HTTP protocol - - For details, please refer to [Set Config Action](../http-actions/fe/set-config-action.md) - - This method can also persist the modified configuration items. The configuration items will be persisted in the `fe_custom.conf` file and will still take effect after FE is restarted. - -## Examples - -1. Modify `async_pending_load_task_pool_size` - - Through `ADMIN SHOW FRONTEND CONFIG;` you can see that this configuration item cannot be dynamically configured (`IsMutable` is false). You need to add in `fe.conf`: - - `async_pending_load_task_pool_size = 20` - - Then restart the FE process to take effect the configuration. - -2. Modify `dynamic_partition_enable` - - Through `ADMIN SHOW FRONTEND CONFIG;` you can see that the configuration item can be dynamically configured (`IsMutable` is true). And it is the unique configuration of Master FE. Then first we can connect to any FE and execute the following command to modify the configuration: - - ``` - ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true"); ` - ``` - - Afterwards, you can view the modified value with the following command: - - ``` - set forward_to_master = true; - ADMIN SHOW FRONTEND CONFIG; - ``` - - After modification in the above manner, if the Master FE restarts or a Master election is performed, the configuration will be invalid. You can add the configuration item directly in `fe.conf` and restart the FE to make the configuration item permanent. - -3. Modify `max_distribution_pruner_recursion_depth` - - Through `ADMIN SHOW FRONTEND CONFIG;` you can see that the configuration item can be dynamically configured (`IsMutable` is true). It is not unique to Master FE. - - Similarly, we can modify the configuration by dynamically modifying the configuration command. Because this configuration is not unique to the Master FE, user need to connect to different FEs separately to modify the configuration dynamically, so that all FEs use the modified configuration values. - -## Configurations - -### max_dynamic_partition_num - -Default: 500 - -IsMutable: true - -MasterOnly: true - -Used to limit the maximum number of partitions that can be created when creating a dynamic partition table, to avoid creating too many partitions at one time. The number is determined by "start" and "end" in the dynamic partition parameters.. - -### grpc_max_message_size_bytes - -Default: 1G - -Used to set the initial flow window size of the GRPC client channel, and also used to max message size. When the result set is large, you may need to increase this value. - -### min_replication_num_per_tablet - -Default: 1 - -Used to set minimal number of replication per tablet. - -### max_replication_num_per_tablet - -Default: 32767 - -Used to set maximal number of replication per tablet. - -### enable_outfile_to_local - -Default: false - -Whether to allow the outfile function to export the results to the local disk. - -### enable_access_file_without_broker - -Default: false - -IsMutable: true - -MasterOnly: true - -This config is used to try skip broker when access bos or other cloud storage via broker - -### enable_bdbje_debug_mode - -Default: false - -If set to true, FE will be started in BDBJE debug mode - -### enable_alpha_rowset - -Default: false - -Whether to support the creation of alpha rowset tables. The default is false and should only be used in emergency situations, this config should be remove in some future version - -### enable_http_server_v2 - -Default: The default is true after the official 0.14.0 version is released, and the default is false before - -HTTP Server V2 is implemented by SpringBoot. It uses an architecture that separates the front and back ends. Only when httpv2 is enabled can users use the new front-end UI interface. - -### jetty_server_acceptors - -Default: 2 - -### jetty_server_selectors - -Default: 4 - -### jetty_server_workers - -Default: 0 - -With the above three parameters, Jetty's thread architecture model is very simple, divided into acceptors, selectors and workers three thread pools. Acceptors are responsible for accepting new connections, and then hand them over to selectors to process the unpacking of the HTTP message protocol, and finally workers process the request. The first two thread pools adopt a non-blocking model, and one thread can handle the read and write of many sockets, so the number of thread pools is small. - -For most projects, only 4 acceptors threads are required, and 2 to 4 selectors threads are sufficient. Workers are obstructive business logic, often have more database operations, and require a large number of threads. The specific number depends on the proportion of QPS and IO events of the application. The higher the QPS, the more threads are required, the higher the proportion of IO, the more threads waiting, and the more total threads required. - -Worker thread pool is not set by default, set according to your needs - -### jetty_threadPool_minThreads - -The minimum number of threads in the Jetty thread pool, the default is 20 - -### jetty_threadPool_maxThreads - -The maximum number of threads in the Jetty thread pool, the default is 400 - -### jetty_server_max_http_post_size - -Default: 100 * 1024 * 1024 (100MB) - -This is the maximum number of bytes of the file uploaded by the put or post method, the default value: 100MB - -### **`disable_mini_load`** - -Whether to disable the mini load data import method, the default: true (Disabled) - -### frontend_address - -Status: Deprecated, not recommended use. This parameter may be deleted later Type: string Description: Explicitly set the IP address of FE instead of using *InetAddress.getByName* to get the IP address. Usually in *InetAddress.getByName* When the expected results cannot be obtained. Only IP address is supported, not hostname. Default value: 0.0.0.0 - -### default_max_filter_ratio - -Default: 0 - -IsMutable: true - -MasterOnly: true - -Maximum percentage of data that can be filtered (due to reasons such as data is irregularly) , The default value is 0. - -### default_db_data_quota_bytes - -Default: 1PB - -IsMutable: true - -MasterOnly: true - -Used to set the default database data quota size. To set the quota size of a single database, you can use: - -``` -Set the database data quota, the unit is:B/K/KB/M/MB/G/GB/T/TB/P/PB -ALTER DATABASE db_name SET DATA QUOTA quota; -View configuration -show data (Detail: HELP SHOW DATA) -``` - -### default_db_replica_quota_size - -Default: 1073741824 - -IsMutable: true - -MasterOnly: true - -Used to set the default database replica quota. To set the quota size of a single database, you can use: - -``` -Set the database replica quota -ALTER DATABASE db_name SET REPLICA QUOTA quota; -View configuration -show data (Detail: HELP SHOW DATA) -``` - -### enable_batch_delete_by_default - -Default: false - -IsMutable: true - -MasterOnly: true - -Whether to add a delete sign column when create unique table - -### recover_with_empty_tablet - -Default: false - -IsMutable: true - -MasterOnly: true - - In some very special circumstances, such as code bugs, or human misoperation, etc., all replicas of some tablets may be lost. In this case, the data has been substantially lost. However, in some scenarios, the business still hopes to ensure that the query will not report errors even if there is data loss, and reduce the perception of the user layer. At this point, we can use the blank Tablet to fill the missing replica to ensure that the query can be executed normally. - -Set to true so that Doris will automatically use blank replicas to fill tablets which all replicas have been damaged or missing - -### max_allowed_in_element_num_of_delete - -Default: 1024 - -IsMutable: true - -MasterOnly: true - -This configuration is used to limit element num of InPredicate in delete statement. - -### cache_result_max_row_count - -Default: 3000 - -IsMutable: true - -MasterOnly: false - -In order to avoid occupying too much memory, the maximum number of rows that can be cached is 2000 by default. If this threshold is exceeded, the cache cannot be set - -### cache_last_version_interval_second - -Default: 900 - -IsMutable: true - -MasterOnly: false - -The time interval of the latest partitioned version of the table refers to the time interval between the data update and the current version. It is generally set to 900 seconds, which distinguishes offline and real-time import - -### cache_enable_partition_mode - -Default: true - -IsMutable: true - -MasterOnly: false - -When this switch is turned on, the query result set will be cached according to the partition. If the interval between the query table partition time and the query time is less than cache_last_version_interval_second, the result set will be cached according to the partition. - -Part of the data will be obtained from the cache and some data from the disk when querying, and the data will be merged and returned to the client. - -### cache_enable_sql_mode - -Default: true - -IsMutable: true - -MasterOnly: false - -If this switch is turned on, the SQL query result set will be cached. If the interval between the last visit version time in all partitions of all tables in the query is greater than cache_last_version_interval_second, and the result set is less than cache_result_max_row_count, the result set will be cached, and the next same SQL will hit the cache - -If set to true, fe will enable sql result caching. This option is suitable for offline data update scenarios - -| | case1 | case2 | case3 | case4 | -| ---------------------- | ----- | ----- | ----- | ----- | -| enable_sql_cache | false | true | true | false | -| enable_partition_cache | false | false | true | true | - -### min_clone_task_timeout_sec 和 max_clone_task_timeout_sec - -Default: Minimum 3 minutes, maximum two hours - -IsMutable: true - -MasterOnly: true - -Type: long Description: Used to control the maximum timeout of a clone task. The unit is second. Default value: 7200 Dynamic modification: yes - -Can cooperate with `mix_clone_task_timeout_sec` to control the maximum and minimum timeout of a clone task. Under normal circumstances, the timeout of a clone task is estimated by the amount of data and the minimum transfer rate (5MB/s). In some special cases, these two configurations can be used to set the upper and lower bounds of the clone task timeout to ensure that the clone task can be completed successfully. - -### agent_task_resend_wait_time_ms - -Default: 5000 - -IsMutable: true - -MasterOnly: true - -This configuration will decide whether to resend agent task when create_time for agent_task is set, only when current_time - create_time > agent_task_resend_wait_time_ms can ReportHandler do resend agent task. - -This configuration is currently mainly used to solve the problem of repeated sending of `PUBLISH_VERSION` agent tasks. The current default value of this configuration is 5000, which is an experimental value. - -Because there is a certain time delay between submitting agent tasks to AgentTaskQueue and submitting to be, Increasing the value of this configuration can effectively solve the problem of repeated sending of agent tasks, - -But at the same time, it will cause the submission of failed or failed execution of the agent task to be executed again for an extended period of time - -### enable_odbc_table - -Default: false - -IsMutable: true - -MasterOnly: true - -Whether to enable the ODBC table, it is not enabled by default. You need to manually configure it when you use it. This parameter can be set by: ADMIN SET FRONTEND CONFIG("key"="value") - -### enable_spark_load - -Default: false - -IsMutable: true - -MasterOnly: true - -Whether to enable spark load temporarily, it is not enabled by default - -### disable_storage_medium_check - -Default: false - -IsMutable: true - -MasterOnly: true - -If disable_storage_medium_check is true, ReportHandler would not check tablet's storage medium and disable storage cool down function, the default value is false. You can set the value true when you don't care what the storage medium of the tablet is. - -### drop_backend_after_decommission - -Default: false - -IsMutable: true - -MasterOnly: true - -1. This configuration is used to control whether the system drops the BE after successfully decommissioning the BE. If true, the BE node will be deleted after the BE is successfully offline. If false, after the BE successfully goes offline, the BE will remain in the DECOMMISSION state, but will not be dropped. - - This configuration can play a role in certain scenarios. Assume that the initial state of a Doris cluster is one disk per BE node. After running for a period of time, the system has been vertically expanded, that is, each BE node adds 2 new disks. Because Doris currently does not support data balancing among the disks within the BE, the data volume of the initial disk may always be much higher than the data volume of the newly added disk. At this time, we can perform manual inter-disk balancing by the following operations: - - 1. Set the configuration item to false. - 2. Perform a decommission operation on a certain BE node. This operation will migrate all data on the BE to other nodes. - 3. After the decommission operation is completed, the BE will not be dropped. At this time, cancel the decommission status of the BE. Then the data will start to balance from other BE nodes back to this node. At this time, the data will be evenly distributed to all disks of the BE. - 4. Perform steps 2 and 3 for all BE nodes in sequence, and finally achieve the purpose of disk balancing for all nodes - -### period_of_auto_resume_min - -Default: 5 (s) - -IsMutable: true - -MasterOnly: true - -Automatically restore the cycle of Routine load - -### max_tolerable_backend_down_num - -Default: 0 - -IsMutable: true - -MasterOnly: true - -As long as one BE is down, Routine Load cannot be automatically restored - -### enable_materialized_view - -Default: true - -IsMutable: true - -MasterOnly: true - -This configuration is used to turn on and off the creation of materialized views. If set to true, the function to create a materialized view is enabled. The user can create a materialized view through the `CREATE MATERIALIZED VIEW` command. If set to false, materialized views cannot be created. - -If you get an error `The materialized view is coming soon` or `The materialized view is disabled` when creating the materialized view, it means that the configuration is set to false and the function of creating the materialized view is turned off. You can start to create a materialized view by modifying the configuration to true. - -This variable is a dynamic configuration, and users can modify the configuration through commands after the FE process starts. You can also modify the FE configuration file and restart the FE to take effect - -### check_java_version - -Default: true - -Doris will check whether the compiled and run Java versions are compatible, if not, it will throw a Java version mismatch exception message and terminate the startup - -### max_running_rollup_job_num_per_table - -Default: 1 - -IsMutable: true - -MasterOnly: true - -Control the concurrency limit of Rollup jobs - -### dynamic_partition_enable - -Default: true - -IsMutable: true - -MasterOnly: true - -Whether to enable dynamic partition, enabled by default - -### dynamic_partition_check_interval_seconds - -Default: 600 (s) - -IsMutable: true - -MasterOnly: true - -Decide how often to check dynamic partition - -### disable_cluster_feature - -Default: true - -IsMutable: true - -The multi cluster feature will be deprecated in version 0.12 ,set this config to true will disable all operations related to cluster feature, include: - create/drop cluster - add free backend/add backend to cluster/decommission cluster balance - change the backends num of cluster - link/migration db - -### force_do_metadata_checkpoint - -Default: false - -IsMutable: true - -MasterOnly: true - -If set to true, the checkpoint thread will make the checkpoint regardless of the jvm memory used percent - -### metadata_checkpoint_memory_threshold - -Default: 60 (60%) - -IsMutable: true - -MasterOnly: true - - If the jvm memory used percent(heap or old mem pool) exceed this threshold, checkpoint thread will not work to avoid OOM. - -### max_distribution_pruner_recursion_depth - -Default: 100 - -IsMutable: true - -MasterOnly: false - -This will limit the max recursion depth of hash distribution pruner. - eg: where a in (5 elements) and b in (4 elements) and c in (3 elements) and d in (2 elements). - a/b/c/d are distribution columns, so the recursion depth will be 5 * 4 * 3 * 2 = 120, larger than 100, - So that distribution pruner will no work and just return all buckets. - Increase the depth can support distribution pruning for more elements, but may cost more CPU. - -### max_backup_restore_job_num_per_db - -Default: 10 - -This configuration is mainly used to control the number of backup/restore tasks recorded in each database. - -### using_old_load_usage_pattern - -Default: false - -IsMutable: true - -MasterOnly: true - -If set to true, the insert stmt with processing error will still return a label to user. And user can use this label to check the load job's status. The default value is false, which means if insert operation encounter errors, exception will be thrown to user client directly without load label. - -### small_file_dir - -Default: DORIS_HOME_DIR/small_files - -Save small files - -### max_small_file_size_bytes - -Default: 1M - -IsMutable: true - -MasterOnly: true - -The max size of a single file store in SmallFileMgr - -### max_small_file_number - -Default: 100 - -IsMutable: true - -MasterOnly: true - -The max number of files store in SmallFileMgr - -### max_routine_load_task_num_per_be - -Default: 5 - -IsMutable: true - -MasterOnly: true - -the max concurrent routine load task num per BE. This is to limit the num of routine load tasks sending to a BE, and it should also less than BE config 'routine_load_thread_pool_size'(default 10), which is the routine load task thread pool size on BE. - -### max_routine_load_task_concurrent_num - -Default: 5 - -IsMutable: true - -MasterOnly: true - -the max concurrent routine load task num of a single routine load job - -### max_routine_load_job_num - -Default: 100 - -the max routine load job num, including NEED_SCHEDULED, RUNNING, PAUSE - -### max_running_txn_num_per_db - -Default: 100 - -IsMutable: true - -MasterOnly: true - -This configuration is mainly used to control the number of concurrent load jobs of the same database. - -When there are too many load jobs running in the cluster, the newly submitted load jobs may report errors: - -```text -current running txns on db xxx is xx, larger than limit xx -``` - -When this error is encountered, it means that the load jobs currently running in the cluster exceeds the configuration value. At this time, it is recommended to wait on the business side and retry the load jobs. - -Generally it is not recommended to increase this configuration value. An excessively high number of concurrency may cause excessive system load - -### enable_metric_calculator - -Default: true - -If set to true, metric collector will be run as a daemon timer to collect metrics at fix interval - -### report_queue_size - -Default: 100 - -IsMutable: true - -MasterOnly: true - - This threshold is to avoid piling up too many report task in FE, which may cause OOM exception. In some large Doris cluster, eg: 100 Backends with ten million replicas, a tablet report may cost several seconds after some modification of metadata(drop partition, etc..). And one Backend will report tablets info every 1 min, so unlimited receiving reports is unacceptable. we will optimize the processing speed of tablet report in future, but now, just discard the report if queue size exceeding limit. - Some online time cost: - 1. disk report: 0-1 msta - 2. sk report: 0-1 ms - 3. tablet report - 4. 10000 replicas: 200ms - -### partition_rebalance_max_moves_num_per_selection - -Default: 10 - -IsMutable: true - -MasterOnly: true - -Valid only if use PartitionRebalancer, - -### partition_rebalance_move_expire_after_access - -Default: 600 (s) - -IsMutable: true - -MasterOnly: true - -Valid only if use PartitionRebalancer. If this changed, cached moves will be cleared - -### tablet_rebalancer_type - -Default: BeLoad - -MasterOnly: true - -Rebalancer type(ignore case): BeLoad, Partition. If type parse failed, use BeLoad as default - -### max_balancing_tablets - -Default: 100 - -IsMutable: true - -MasterOnly: true - -if the number of balancing tablets in TabletScheduler exceed max_balancing_tablets, no more balance check - -### max_scheduling_tablets - -Default: 2000 - -IsMutable: true - -MasterOnly: true - -if the number of scheduled tablets in TabletScheduler exceed max_scheduling_tablets skip checking. - -### disable_balance - -Default: false - -IsMutable: true - -MasterOnly: true - -if set to true, TabletScheduler will not do balance. - -### balance_load_score_threshold - -Default: 0.1 (10%) - -IsMutable: true - -MasterOnly: true - -the threshold of cluster balance score, if a backend's load score is 10% lower than average score, this backend will be marked as LOW load, if load score is 10% higher than average score, HIGH load will be marked - -### schedule_slot_num_per_path - -Default: 2 - -the default slot number per path in tablet scheduler , remove this config and dynamically adjust it by clone task statistic - -### tablet_repair_delay_factor_second - -Default: 60 (s) - -IsMutable: true - -MasterOnly: true - -the factor of delay time before deciding to repair tablet. if priority is VERY_HIGH, repair it immediately. - -- HIGH, delay tablet_repair_delay_factor_second * 1; -- NORMAL: delay tablet_repair_delay_factor_second * 2; -- LOW: delay tablet_repair_delay_factor_second * 3; - -### es_state_sync_interval_second - -Default: 10 - -fe will call es api to get es index shard info every es_state_sync_interval_secs - -### disable_hadoop_load - -Default: false - -IsMutable: true - -MasterOnly: true - -Load using hadoop cluster will be deprecated in future. Set to true to disable this kind of load. - -### db_used_data_quota_update_interval_secs - -Default: 300 (s) - -IsMutable: true - -MasterOnly: true - -For better data load performance, in the check of whether the amount of data used by the database before data load exceeds the quota, we do not calculate the amount of data already used by the database in real time, but obtain the periodically updated value of the daemon thread. - -This configuration is used to set the time interval for updating the value of the amount of data used by the database - -### disable_load_job - -Default: false - -IsMutable: true - -MasterOnly: true - -if this is set to true - -- all pending load job will failed when call begin txn api -- all prepare load job will failed when call commit txn api -- all committed load job will waiting to be published - -### catalog_try_lock_timeout_ms - -Default: 5000 (ms) - -IsMutable: true - -The tryLock timeout configuration of catalog lock. Normally it does not need to change, unless you need to test something. - -### max_query_retry_time - -Default: 1 - -IsMutable: true - -The number of query retries. A query may retry if we encounter RPC exception and no result has been sent to user. You may reduce this number to avoid Avalanche disaster - -### remote_fragment_exec_timeout_ms - -Default: 5000 (ms) - -IsMutable: true - -The timeout of executing async remote fragment. In normal case, the async remote fragment will be executed in a short time. If system are under high load condition,try to set this timeout longer. - -### enable_local_replica_selection - -Default: false - -IsMutable: true - -If set to true, Planner will try to select replica of tablet on same host as this Frontend. This may reduce network transmission in following case: - -- N hosts with N Backends and N Frontends deployed. -- The data has N replicas. -- High concurrency queries are syyuyuient to all Frontends evenly -- In this case, all Frontends can only use local replicas to do the query. If you want to allow fallback to nonlocal replicas when no local replicas available, set enable_local_replica_selection_fallback to true. - -### enable_local_replica_selection_fallback - -Default: false - -IsMutable: true - -Used with enable_local_replica_selection. If the local replicas is not available, fallback to the nonlocal replicas. - -### max_unfinished_load_job - -Default: 1000 - -IsMutable: true - -MasterOnly: true - - Max number of load jobs, include PENDING、ETL、LOADING、QUORUM_FINISHED. If exceed this number, load job is not allowed to be submitted - -### max_bytes_per_broker_scanner - -Default: 3 * 1024 * 1024 * 1024L (3G) - -IsMutable: true - -MasterOnly: true - -Max bytes a broker scanner can process in one broker load job. Commonly, each Backends has one broker scanner. - -### enable_auth_check - -Default: true - -if set to false, auth check will be disable, in case some goes wrong with the new privilege system. - -### tablet_stat_update_interval_second - -Default: 300,(5min) - -update interval of tablet stat , All frontends will get tablet stat from all backends at each interval - -### storage_flood_stage_usage_percent - -Default: 95 (95%) - -IsMutable: true - -MasterOnly: true - -### storage_flood_stage_left_capacity_bytes - -Default: - - storage_flood_stage_usage_percent : 95 (95%) - - storage_flood_stage_left_capacity_bytes : 1 * 1024 * 1024 * 1024 (1GB) - -IsMutable: true - -MasterOnly: true - -If capacity of disk reach the 'storage_flood_stage_usage_percent' and 'storage_flood_stage_left_capacity_bytes', the following operation will be rejected: - -1. load job -2. restore job - -### storage_high_watermark_usage_percent - -Default: 85 (85%) - -IsMutable: true - -MasterOnly: true - -### storage_min_left_capacity_bytes - -Default: 2 * 1024 * 1024 * 1024 (2GB) - -IsMutable: true - -MasterOnly: true - - 'storage_high_watermark_usage_percent' limit the max capacity usage percent of a Backend storage path. 'storage_min_left_capacity_bytes' limit the minimum left capacity of a Backend storage path. If both limitations are reached, this storage path can not be chose as tablet balance destination. But for tablet recovery, we may exceed these limit for keeping data integrity as much as possible. - -### backup_job_default_timeout_ms - -Default: 86400 * 1000 (1day) - -IsMutable: true - -MasterOnly: true - -default timeout of backup job - -### with_k8s_certs - -Default: false - -If use k8s deploy manager locally, set this to true and prepare the certs files - -### dpp_hadoop_client_path - -Default: /lib/hadoop-client/hadoop/bin/hadoop - -### dpp_bytes_per_reduce - -Default: 100 * 1024 * 1024L; // 100M - -### dpp_default_cluster - -Default: palo-dpp - -### dpp_default_config_str - -Default: { - hadoop_configs : 'mapred.job.priority=NORMAL;mapred.job.map.capacity=50;mapred.job.reduce.capacity=50;mapred.hce.replace.streaming=false;abaci.long.stored.job=true;dce.shuffle.enable=false;dfs.client.authserver.force_stop=true;dfs.client.auth.method=0' - } - -### dpp_config_str - -Default: { - palo-dpp : { - hadoop_palo_path : '/dir', - hadoop_configs : 'fs.default.name=hdfs://host:port;mapred.job.tracker=host:port;hadoop.job.ugi=user,password' - } - } - -### enable_deploy_manager - -Default: disable - - Set to true if you deploy Palo using thirdparty deploy manager Valid options are: - -- disable: no deploy manager -- k8s: Kubernetes -- ambari: Ambari -- local: Local File (for test or Boxer2 BCC version) - -### enable_token_check - -Default: true - -For forward compatibility, will be removed later. check token when download image file. - -### expr_depth_limit - -Default: 3000 - -IsMutable: true - -Limit on the depth of an expr tree. Exceed this limit may cause long analysis time while holding db read lock. Do not set this if you know what you are doing - -### expr_children_limit - -Default: 10000 - -IsMutable: true - -Limit on the number of expr children of an expr tree. Exceed this limit may cause long analysis time while holding database read lock. - -### proxy_auth_magic_prefix - -Default: x@8 - -### proxy_auth_enable - -Default: false - -### meta_publish_timeout_ms - -Default: 1000 (ms) - -The default user resource publishing timeout - -### disable_colocate_balance - -Default: false - -IsMutable: true - -MasterOnly: true - -This configs can set to true to disable the automatic colocate tables's relocate and balance. If 'disable_colocate_balance' is set to true, ColocateTableBalancer will not relocate and balance colocate tables. - **Attention**: - -- Under normal circumstances, there is no need to turn off balance at all. -- Because once the balance is turned off, the unstable colocate table may not be restored -- Eventually the colocate plan cannot be used when querying. - -### query_colocate_join_memory_limit_penalty_factor - -Default: 1 - -IsMutable: true - -colocote join PlanFragment instance的memory_limit = exec_mem_limit / min (query_colocate_join_memory_limit_penalty_factor, instance_num) - -### max_connection_scheduler_threads_num - -Default: 4096 - -Maximal number of thread in connection-scheduler-pool. - -### qe_max_connection - -Default: 1024 - -Maximal number of connections per FE. - -### check_consistency_default_timeout_second - -Default: 600 (10分钟) - -IsMutable: true - -MasterOnly: true - -Default timeout of a single consistency check task. Set long enough to fit your tablet size - -### consistency_check_start_time - -Default: 23 - -IsMutable: true - -MasterOnly: true - -Consistency checker will run from *consistency_check_start_time* to *consistency_check_end_time*. Default is from 23:00 to 04:00 - -### consistency_check_end_time - -Default: 04 - -IsMutable: true - -MasterOnly: true - -Consistency checker will run from *consistency_check_start_time* to *consistency_check_end_time*. Default is from 23:00 to 04:00 - -### export_tablet_num_per_task - -Default: 5 - -IsMutable: true - -MasterOnly: true - -Number of tablets per export query plan - -### export_task_default_timeout_second - -Default: 2 * 3600 (2 hour) - -IsMutable: true - -MasterOnly: true - -Default timeout of export jobs. - -### export_running_job_num_limit - -Default: 5 - -IsMutable: true - -MasterOnly: true - -Limitation of the concurrency of running export jobs. Default is 5. 0 is unlimited - -### export_checker_interval_second - -Default: 5 - -Export checker's running interval. - -### default_load_parallelism - -Default: 1 - -IsMutable: true - -MasterOnly: true - -Default parallelism of the broker load execution plan on a single node. -If the user to set the parallelism when the broker load is submitted, this parameter will be ignored. - -### max_broker_concurrency - -Default: 10 - -IsMutable: true - -MasterOnly: true - -Maximal concurrency of broker scanners. - -### min_bytes_per_broker_scanner - -Default: 67108864L (64M) - -IsMutable: true - -MasterOnly: true - -Minimum bytes that a single broker scanner will read. - -### catalog_trash_expire_second - -Default: 86400L (1day) - -IsMutable: true - -MasterOnly: true - -After dropping database(table/partition), you can recover it by using RECOVER stmt. And this specifies the maximal data retention time. After time, the data will be deleted permanently. - -### storage_cooldown_second - -Default: 30 * 24 * 3600L (30day) - -When create a table(or partition), you can specify its storage medium(HDD or SSD). If set to SSD, this specifies the default duration that tablets will stay on SSD. After that, tablets will be moved to HDD automatically. You can set storage cooldown time in CREATE TABLE stmt. - -### default_storage_medium - -Default: HDD - -When create a table(or partition), you can specify its storage medium(HDD or SSD). If not set, this specifies the default medium when creat. - -### max_backend_down_time_second - -Default: 3600 (1hour) - -IsMutable: true - -MasterOnly: true - -If a backend is down for *max_backend_down_time_second*, a BACKEND_DOWN event will be triggered. - -### alter_table_timeout_second - -Default: 86400 (1day) - -IsMutable: true - -MasterOnly: true - -Maximal timeout of ALTER TABLE request. Set long enough to fit your table data size. - -### capacity_used_percent_high_water - -Default: 0.75 (75%) - -IsMutable: true - -MasterOnly: true - -The high water of disk capacity used percent. This is used for calculating load score of a backend - -### clone_distribution_balance_threshold - -Default: 0.2 - -IsMutable: true - -MasterOnly: true - -Balance threshold of num of replicas in Backends. - -### clone_capacity_balance_threshold - -Default: 0.2 - -IsMutable: true - -MasterOnly: true - -Balance threshold of data size in BE. - The balance algorithm is: - -1. Calculate the average used capacity(AUC) of the entire cluster. (total data size / total backends num) -2. The high water level is (AUC * (1 + clone_capacity_balance_threshold)) -3. The low water level is (AUC * (1 - clone_capacity_balance_threshold)) -4. The Clone checker will try to move replica from high water level BE to low water level BE. - -### replica_delay_recovery_second - -Default: 0 - -IsMutable: true - -MasterOnly: true - -the minimal delay seconds between a replica is failed and fe try to recovery it using clone. - -### clone_high_priority_delay_second - -Default: 0 - -IsMutable: true - -MasterOnly: true - -HIGH priority clone job's delay trigger time. - -### clone_normal_priority_delay_second - -Default: 300 (5min) - -IsMutable: true - -MasterOnly: true - -NORMAL priority clone job's delay trigger time - -### clone_low_priority_delay_second - -Default: 600 (10min) - -IsMutable: true - -MasterOnly: true - -LOW priority clone job's delay trigger time. A clone job contains a tablet which need to be cloned(recovery or migration). If the priority is LOW, it will be delayed *clone_low_priority_delay_second* after the job creation and then be executed. This is to avoid a large number of clone jobs running at same time only because a host is down for a short time. - **NOTICE** that this config(and *clone_normal_priority_delay_second* as well) will not work if it's smaller then *clone_checker_interval_second* - -### clone_max_job_num - -Default: 100 - -IsMutable: true - -MasterOnly: true - -Concurrency of LOW priority clone jobs. Concurrency of High priority clone jobs is currently unlimited. - -### clone_job_timeout_second - -Default: 7200 (2小时) - -IsMutable: true - -MasterOnly: true - -Default timeout of a single clone job. Set long enough to fit your replica size. The larger the replica data size is, the more time is will cost to finish clone - -### clone_checker_interval_second - -Default: 300 (5min) - -Clone checker's running interval - -### tablet_delete_timeout_second - -Default: 2 - -IsMutable: true - -MasterOnly: true - -Same meaning as *tablet_create_timeout_second*, but used when delete a tablet. - -### async_loading_load_task_pool_size - -Default: 10 - -IsMutable: false - -MasterOnly: true - -The loading_load task executor pool size. This pool size limits the max running loading_load tasks. - -Currently, it only limits the loading_load task of broker load - -### async_pending_load_task_pool_size - -Default: 10 - -IsMutable: false - -MasterOnly: true - -The pending_load task executor pool size. This pool size limits the max running pending_load tasks. - -Currently, it only limits the pending_load task of broker load and spark load. - -It should be less than 'max_running_txn_num_per_db' - -### async_load_task_pool_size - -Default: 10 - -IsMutable: false - -MasterOnly: true - -This configuration is just for compatible with old version, this config has been replaced by async_loading_load_task_pool_size, it will be removed in the future. - -### disable_show_stream_load - -Default: false - -IsMutable: true - -MasterOnly: true - -Whether to disable show stream load and clear stream load records in memory. - -### max_stream_load_record_size - -Default: 5000 - -IsMutable: true - -MasterOnly: true - -Default max number of recent stream load record that can be stored in memory. - -### fetch_stream_load_record_interval_second - -Default: 120 - -IsMutable: true - -MasterOnly: true - -fetch stream load record interval. - -### desired_max_waiting_jobs - -Default: 100 - -IsMutable: true - -MasterOnly: true - -Default number of waiting jobs for routine load and version 2 of load , This is a desired number. In some situation, such as switch the master, the current number is maybe more than desired_max_waiting_jobs. - -### yarn_config_dir - -Default: PaloFe.DORIS_HOME_DIR + "/lib/yarn-config" - -Default yarn config file directory ,Each time before running the yarn command, we need to check that the config file exists under this path, and if not, create them. - - -### yarn_client_path - -Default: DORIS_HOME_DIR + "/lib/yarn-client/hadoop/bin/yarn" - -Default yarn client path - -### spark_launcher_log_dir - -Default: sys_log_dir + "/spark_launcher_log" - -The specified spark launcher log dir - -### spark_resource_path - -Default: none - -Default spark dependencies path - -### spark_home_default_dir - -Default: DORIS_HOME_DIR + "/lib/spark2x" - -Default spark home dir - -### spark_load_default_timeout_second - -Default: 86400 (1天) - -IsMutable: true - -MasterOnly: true - -Default spark load timeout - -### spark_dpp_version - -Default: 1.0.0 - -Default spark dpp version - -### hadoop_load_default_timeout_second - -Default: 86400 * 3 (3天) - -IsMutable: true - -MasterOnly: true - -Default hadoop load timeout - -### min_load_timeout_second - -Default: 1 (1s) - -IsMutable: true - -MasterOnly: true - -Min stream load timeout applicable to all type of load - -### max_stream_load_timeout_second - -Default: 259200 (3天) - -IsMutable: true - -MasterOnly: true - -This configuration is specifically used to limit timeout setting for stream load. It is to prevent that failed stream load transactions cannot be canceled within a short time because of the user's large timeout setting - -### max_load_timeout_second - -Default: 259200 (3天) - -IsMutable: true - -MasterOnly: true - -Max load timeout applicable to all type of load except for stream load - -### stream_load_default_timeout_second - -Default: 600 (s) - -IsMutable: true - -MasterOnly: true - -Default stream load and streaming mini load timeout - -### insert_load_default_timeout_second - -Default: 3600 (1 hour) - -IsMutable: true - -MasterOnly: true - -Default insert load timeout - -### mini_load_default_timeout_second - -Default: 3600 (1 hour) - -IsMutable: true - -MasterOnly: true - -Default non-streaming mini load timeout - -### broker_load_default_timeout_second - -Default: 14400 (4 hour) - -IsMutable: true - -MasterOnly: true - -Default broker load timeout - -### load_running_job_num_limit - -Default: 0 - -IsMutable: true - -MasterOnly: true - -The number of loading tasks is limited, the default is 0, no limit - -### load_input_size_limit_gb - -Default: 0 - -IsMutable: true - -MasterOnly: true - -The size of the data entered by the Load job, the default is 0, unlimited - -### delete_thread_num - -Default: 10 - -Concurrency of delete jobs. - -### load_etl_thread_num_normal_priority - -Default: 10 - -Concurrency of NORMAL priority etl load jobs. Do not change this if you know what you are doing. - -### load_etl_thread_num_high_priority - -Default: 3 - -Concurrency of HIGH priority etl load jobs. Do not change this if you know what you are doing - -### load_pending_thread_num_normal_priority - -Default: 10 - -Concurrency of NORMAL priority pending load jobs. Do not change this if you know what you are doing. - -### load_pending_thread_num_high_priority - -Default: 3 - - Concurrency of HIGH priority pending load jobs. Load job priority is defined as HIGH or NORMAL. All mini batch load jobs are HIGH priority, other types of load jobs are NORMAL priority. Priority is set to avoid that a slow load job occupies a thread for a long time. This is just a internal optimized scheduling policy. Currently, you can not specified the job priority manually, and do not change this if you know what you are doing. - -### load_checker_interval_second - -Default: 5 (s) - -The load scheduler running interval. A load job will transfer its state from PENDING to LOADING to FINISHED. The load scheduler will transfer load job from PENDING to LOADING while the txn callback will transfer load job from LOADING to FINISHED. So a load job will cost at most one interval to finish when the concurrency has not reached the upper limit. - -### max_layout_length_per_row - -Default: 100000 - -IsMutable: true - -MasterOnly: true - -Maximal memory layout length of a row. default is 100 KB. In BE, the maximal size of a RowBlock is 100MB(Configure as max_unpacked_row_block_size in be.conf). And each RowBlock contains 1024 rows. So the maximal size of a row is approximately 100 KB. - eg. - schema: k1(int), v1(decimal), v2(varchar(2000)) - then the memory layout length of a row is: 4(int) + 16(decimal) + 2000(varchar) = 2020 (Bytes) - See memory layout length of all types, run 'help create table' in mysql-client. - If you want to increase this number to support more columns in a row, you also need to increase the - max_unpacked_row_block_size in be.conf. But the performance impact is unknown. - -### load_straggler_wait_second - -Default: 300 - -IsMutable: true - -MasterOnly: true - -Maximal wait seconds for straggler node in load - eg. - there are 3 replicas A, B, C - load is already quorum finished(A,B) at t1 and C is not finished - if (current_time - t1) > 300s, then palo will treat C as a failure node - will call transaction manager to commit the transaction and tell transaction manager - that C is failed - - This is also used when waiting for publish tasks - this parameter is the default value for all job and the DBA could specify it for separate job - -### thrift_server_max_worker_threads - -Default: 4096 - -The thrift server max worker threads - -### publish_version_interval_ms - -Default: 10 (ms) - -minimal intervals between two publish version action - -### publish_version_timeout_second - -Default: 30 (s) - -IsMutable: true - -MasterOnly: true - -Maximal waiting time for all publish version tasks of one transaction to be finished - -### max_create_table_timeout_second - -Default: 60 (s) - -IsMutable: true - -MasterOnly: true - -In order not to wait too long for create table(index), set a max timeout. - -### tablet_create_timeout_second - -Default: 1(s) - -IsMutable: true - -MasterOnly: true - -Maximal waiting time for creating a single replica. - eg. - if you create a table with #m tablets and #n replicas for each tablet, - the create table request will run at most (m * n * tablet_create_timeout_second) before timeout. - -### max_mysql_service_task_threads_num - -Default: 4096 - -When FeEstarts the MySQL server based on NIO model, the number of threads responsible for Task events. Only `mysql_service_nio_enabled` is true takes effect. - -### rewrite_count_distinct_to_bitmap_hll - -Default: true - -This variable is a session variable, and the session level takes effect. - -- Type: boolean -- Description: **Only for the table of the AGG model**, when the variable is true, when the user query contains aggregate functions such as count(distinct c1), if the type of the c1 column itself is bitmap, count distnct will be rewritten It is bitmap_union_count(c1). When the type of the c1 column itself is hll, count distinct will be rewritten as hll_union_agg(c1) If the variable is false, no overwriting occurs.. - -### cluster_id - -Default: -1 - -node(FE or BE) will be considered belonging to the same Palo cluster if they have same cluster id. Cluster id is usually a random integer generated when master FE start at first time. You can also specify one. - -### auth_token - -Default: 空 - -Cluster token used for internal authentication. - -### cluster_name - -Default: Apache doris - -Cluster name will be shown as the title of web page - -### mysql_service_io_threads_num - -Default: 4 - -When FeEstarts the MySQL server based on NIO model, the number of threads responsible for IO events. Only `mysql_service_nio_enabled` is true takes effect. - -### mysql_service_nio_enabled - -Default: true - -Whether FE starts the MySQL server based on NiO model. It is recommended to turn off this option when the query connection is less than 1000 or the concurrency scenario is not high - -### query_port - -Default: 9030 - -FE MySQL server port - -### rpc_port - -Default: 9020 - -FE Thrift Server port - -### thrift_server_type - -This configuration represents the service model used by The Thrift Service of FE, is of type String and is case-insensitive. - -If this parameter is 'SIMPLE', then the 'TSimpleServer' model is used, which is generally not suitable for production and is limited to test use. - -If the parameter is 'THREADED', then the 'TThreadedSelectorServer' model is used, which is a non-blocking I/O model, namely the master-slave Reactor model, which can timely respond to a large number of concurrent connection requests and performs well in most scenarios. - -If this parameter is `THREAD_POOL`, then the `TThreadPoolServer` model is used, the model for blocking I/O model, use the thread pool to handle user connections, the number of simultaneous connections are limited by the number of thread pool, if we can estimate the number of concurrent requests in advance, and tolerant enough thread resources cost, this model will have a better performance, the service model is used by default - -### thrift_backlog_num - -Default: 1024 - -The backlog_num for thrift server , When you enlarge this backlog_num, you should ensure it's value larger than the linux /proc/sys/net/core/somaxconn config - -### thrift_client_timeout_ms - -Default: 0 - -The connection timeout and socket timeout config for thrift server. - -The value for thrift_client_timeout_ms is set to be larger than zero to prevent some hang up problems in java.net.SocketInputStream.socketRead0 - -### mysql_nio_backlog_num - -Default: 1024 - -The backlog_num for mysql nio server, When you enlarge this backlog_num, you should enlarge the value in the linux /proc/sys/net/core/somaxconn file at the same time - -### http_backlog_num - -Default: 1024 - -The backlog_num for netty http server, When you enlarge this backlog_num, you should enlarge the value in the linux /proc/sys/net/core/somaxconn file at the same time - -### http_max_line_length - -Default: 4096 - -The max length of an HTTP URL. The unit of this configuration is BYTE. Defaults to 4096. - -### http_max_header_size - -Default: 8192 - -The max size of allowed HTTP headers. The unit of this configuration is BYTE. Defaults to 8192. - -### http_max_chunk_size - -Default: 8192 - -### http_port - -Default: 8030 - -HTTP bind port. Defaults to 8030 - -### http_api_extra_base_path - -In some deployment environments, user need to specify an additional base path as the unified prefix of the HTTP API. This parameter is used by the user to specify additional prefixes. After setting, user can get the parameter value through the `GET /api/basepath` interface. And the new UI will also try to get this base path first to assemble the URL. Only valid when `enable_http_server_v2` is true. - -The default is empty, that is, not set - -### max_bdbje_clock_delta_ms - -Default: 5000 (5s) - -Set the maximum acceptable clock skew between non-master FE to Master FE host. This value is checked whenever a non-master FE establishes a connection to master FE via BDBJE. The connection is abandoned if the clock skew is larger than this value. - -### ignore_meta_check - -Default: false - -IsMutable: true - -If true, non-master FE will ignore the meta data delay gap between Master FE and its self, even if the metadata delay gap exceeds *meta_delay_toleration_second*. Non-master FE will still offer read service. -This is helpful when you try to stop the Master FE for a relatively long time for some reason, but still wish the non-master FE can offer read service. - -### metadata_failure_recovery - -Default: false - -If true, FE will reset bdbje replication group(that is, to remove all electable nodes info) and is supposed to start as Master. If all the electable nodes can not start, we can copy the meta data to another node and set this config to true to try to restart the FE.. - -### priority_networks - -Default: none - -Declare a selection strategy for those servers have many ips. Note that there should at most one ip match this list. this is a list in semicolon-delimited format, in CIDR notation, e.g. 10.10.10.0/24 , If no ip match this rule, will choose one randomly.. - -### txn_rollback_limit - -Default: 100 - -the max txn number which bdbje can rollback when trying to rejoin the group - -### max_agent_task_threads_num - -Default: 4096 - -MasterOnly: true - -max num of thread to handle agent task in agent task thread-pool. - -### heartbeat_mgr_blocking_queue_size - -Default: 1024 - -MasterOnly: true - -blocking queue size to store heartbeat task in heartbeat_mgr. - -### heartbeat_mgr_threads_num - -Default: 8 - -MasterOnly: true - -num of thread to handle heartbeat events in heartbeat_mgr. - -### bdbje_replica_ack_timeout_second - -Default: 10 (s) - -The replica ack timeout when writing to bdbje , When writing some relatively large logs, the ack time may time out, resulting in log writing failure. At this time, you can increase this value appropriately. - -### bdbje_lock_timeout_second - -Default: 1 - -The lock timeout of bdbje operation, If there are many LockTimeoutException in FE WARN log, you can try to increase this value - -### bdbje_heartbeat_timeout_second - -Default: 30 - -The heartbeat timeout of bdbje between master and follower. the default is 30 seconds, which is same as default value in bdbje. If the network is experiencing transient problems, of some unexpected long java GC annoying you, you can try to increase this value to decrease the chances of false timeouts - -### replica_ack_policy - -Default: SIMPLE_MAJORITY - -OPTION: ALL, NONE, SIMPLE_MAJORITY - -Replica ack policy of bdbje. more info, see: http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.ReplicaAckPolicy.html - -### replica_sync_policy - -Default: SYNC - -选项: SYNC, NO_SYNC, WRITE_NO_SYNC - -Follower FE sync policy of bdbje. - -### master_sync_policy - -Default: SYNC - -选项: SYNC, NO_SYNC, WRITE_NO_SYNC - -Master FE sync policy of bdbje. If you only deploy one Follower FE, set this to 'SYNC'. If you deploy more than 3 Follower FE, you can set this and the following 'replica_sync_policy' to WRITE_NO_SYNC. more info, see: http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.SyncPolicy.html - -### meta_delay_toleration_second - -Default: 300 (5分钟) - -Non-master FE will stop offering service if meta data delay gap exceeds *meta_delay_toleration_second* - -### edit_log_roll_num - -Default: 50000 - -IsMutable: true - -MasterOnly: true - -Master FE will save image every *edit_log_roll_num* meta journals. - -### edit_log_port - -Default: 9010 - -bdbje port - -### edit_log_type - -Default: BDB - -Edit log type. - BDB: write log to bdbje - LOCAL: deprecated.. - -### tmp_dir - -Default: PaloFe.DORIS_HOME_DIR + "/temp_dir" - -temp dir is used to save intermediate results of some process, such as backup and restore process. file in this dir will be cleaned after these process is finished. - -### meta_dir - -Default: DORIS_HOME_DIR + "/doris-meta" - -Type: string Description: Doris meta data will be saved here.The storage of this dir is highly recommended as to be: - -- High write performance (SSD) -- Safe (RAID) - -### custom_config_dir - -Default: PaloFe.DORIS_HOME_DIR + "/conf" - -Configure the location of the `fe_custom.conf` file. The default is in the `conf/` directory. - -In some deployment environments, the `conf/` directory may be overwritten due to system upgrades. This will cause the user modified configuration items to be overwritten. At this time, we can store `fe_custom.conf` in another specified directory to prevent the configuration file from being overwritten. - -### log_roll_size_mb - -Default: 1024 (1G) - -The max size of one sys log and audit log - -### sys_log_dir - -Default: PaloFe.DORIS_HOME_DIR + "/log" - -sys_log_dir: - This specifies FE log dir. FE will produces 2 log files: - fe.log: all logs of FE process. - fe.warn.log all WARNING and ERROR log of FE process. - -### sys_log_level - -Default: INFO - -log level: INFO, WARNING, ERROR, FATAL - -### sys_log_roll_num - -Default: 10 - -Maximal FE log files to be kept within an sys_log_roll_interval. default is 10, which means there will be at most 10 log files in a day - -### sys_log_verbose_modules - -Default: {} - -Verbose modules. VERBOSE level is implemented by log4j DEBUG level. - -eg: - sys_log_verbose_modules = org.apache.doris.catalog - This will only print debug log of files in package org.apache.doris.catalog and all its sub packages. - -### sys_log_roll_interval - -Default: DAY - -sys_log_roll_interval: - -- DAY: log suffix is yyyyMMdd -- HOUR: log suffix is yyyyMMddHH - -### sys_log_delete_age - -Default: 7d - -sys_log_delete_age: - default is 7 days, if log's last modify time is 7 days ago, it will be deleted. - - support format: - 7d 7 day - 10h 10 hours - 60m 60 min - 120s 120 seconds - - -### audit_log_dir - -Default: DORIS_HOME_DIR + "/log" - -audit_log_dir: - This specifies FE audit log dir.. - Audit log fe.audit.log contains all requests with related infos such as user, host, cost, status, etc - -### audit_log_roll_num - -Default: 90 - -Maximal FE audit log files to be kept within an audit_log_roll_interval. - -### audit_log_modules - -Default: {"slow_query", "query", "load", "stream_load"} - -Slow query contains all queries which cost exceed *qe_slow_log_ms* - -### qe_slow_log_ms - -Default: 5000 (5秒) - -If the response time of a query exceed this threshold, it will be recorded in audit log as slow_query. - -### audit_log_roll_interval - -Default: DAY - -DAY: logsuffix is : yyyyMMdd -HOUR: logsuffix is : yyyyMMddHH - -### audit_log_delete_age - -Default: 30d - -default is 30 days, if log's last modify time is 30 days ago, it will be deleted. - - support format: - 7d 7 day - 10h 10 hours - 60m 60 min - 120s 120 seconds - -### plugin_dir - -Default: DORIS_HOME + "/plugins - -plugin install directory - -### plugin_enable - -Default:true - -IsMutable: true - -MasterOnly: true - -Whether the plug-in is enabled, enabled by default - -### label_keep_max_second - -Default: 3 * 24 * 3600 (3day) - -IsMutable: true - -MasterOnly: true - -labels of finished or cancelled load jobs will be removed after *label_keep_max_second* , The removed labels can be reused. Set a short time will lower the FE memory usage. (Because all load jobs' info is kept in memory before being removed) - -In the case of high concurrent writes, if there is a large backlog of jobs and call frontend service failed, check the log. If the metadata write takes too long to lock, you can adjust this value to 12 hours, or 6 hours less - -### streaming_label_keep_max_second - -Default: 43200 (12 hour) - -IsMutable: true - -MasterOnly: true - -For some high-frequency load work, such as: INSERT, STREAMING LOAD, ROUTINE_LOAD_TASK. If it expires, delete the completed job or task. - -### history_job_keep_max_second - -Default: 7 * 24 * 3600 (7 day) - -IsMutable: true - -MasterOnly: true - -The max keep time of some kind of jobs. like schema change job and rollup job. - -### label_clean_interval_second - -Default: 4 * 3600 (4 hour) - -Load label cleaner will run every *label_clean_interval_second* to clean the outdated jobs. - -### delete_info_keep_max_second - -Default: 3 * 24 * 3600 (3day) - -IsMutable: true - -MasterOnly: false - -Delete all deleteInfo older than *delete_info_keep_max_second* , Setting a shorter time will reduce FE memory usage and image file size. (Because all deleteInfo is stored in memory and image files before being deleted) - -### transaction_clean_interval_second - -Default: 30 - -the transaction will be cleaned after transaction_clean_interval_second seconds if the transaction is visible or aborted we should make this interval as short as possible and each clean cycle as soon as possible - - -### default_max_query_instances - -The default value when user property max_query_instances is equal or less than 0. This config is used to limit the max number of instances for a user. This parameter is less than or equal to 0 means unlimited. - -The default value is -1 - -### use_compact_thrift_rpc - -Default: true - -Whether to use compressed format to send query plan structure. After it is turned on, the size of the query plan structure can be reduced by about 50%, thereby avoiding some "send fragment timeout" errors. -However, in some high-concurrency small query scenarios, the concurrency may be reduced by about 10%. - -### enable_force_drop_redundant_replica - -Default: false - -Dynamically configured: true - -Only for Master FE: true - -If set to true, the system will immediately drop redundant replicas in the tablet scheduling logic. This may cause some load jobs that are writing to the corresponding replica to fail, but it will speed up the balance and repair speed of the tablet. -When there are a large number of replicas waiting to be balanced or repaired in the cluster, you can try to set this config to speed up the balance and repair of replicas at the expense of partial load success rate. - -### repair_slow_replica - -Default: false - -IsMutable: true - -MasterOnly: true - -If set to true, the replica with slower compaction will be automatically detected and migrated to other machines. The detection condition is that the version count of the fastest replica exceeds the value of `min_version_count_indicate_replica_compaction_too_slow`, and the ratio of the version count difference from the fastest replica exceeds the value of `valid_version_count_delta_ratio_between_replicas` - -### colocate_group_relocate_delay_second - -Default: 1800 - -Dynamically configured: true - -Only for Master FE: true - -The relocation of a colocation group may involve a large number of tablets moving within the cluster. Therefore, we should use a more conservative strategy to avoid relocation of colocation groups as much as possible. -Reloaction usually occurs after a BE node goes offline or goes down. This parameter is used to delay the determination of BE node unavailability. The default is 30 minutes, i.e., if a BE node recovers within 30 minutes, relocation of the colocation group will not be triggered. - -### allow_replica_on_same_host - -Default: false - -Dynamically configured: false - -Only for Master FE: false - -Whether to allow multiple replicas of the same tablet to be distributed on the same host. This parameter is mainly used for local testing, to facilitate building multiple BEs to test certain multi-replica situations. Do not use it for non-test environments. - -### min_version_count_indicate_replica_compaction_too_slow - -Default: 300 - -Dynamically configured: true - -Only for Master FE: true - -The version count threshold used to judge whether replica compaction is too slow - -### valid_version_count_delta_ratio_between_replicas - -Default: 0.5 - -Dynamically configured: true - -Only for Master FE: true - -The valid ratio threshold of the difference between the version count of the slowest replica and the fastest replica. If `repair_slow_replica` is set to true, it is used to determine whether to repair the slowest replica - -### min_bytes_indicate_replica_too_large - -Default: 2 * 1024 * 1024 * 1024 (2G) - -Dynamically configured: true - -Only for Master FE: true - -The data size threshold used to judge whether replica is too large - -### skip_compaction_slower_replica - -Default: true - -Dynamically configured: true - -Only for Master FE: false - -If set to true, the compaction slower replica will be skipped when select get queryable replicas - -### enable_create_sync_job - -Enable Mysql data synchronization job function. The default is false, this function is turned off - -Default: false - -Is it possible to configure dynamically: true - -Whether it is a configuration item unique to the Master FE node: true - -### sync_commit_interval_second - -The maximum time interval for committing transactions. If there is still data in the channel that has not been submitted after this time, the consumer will notify the channel to submit the transaction. - -Default: 10 (seconds) - -Is it possible to configure dynamically: true - -Whether it is a configuration item unique to the Master FE node: true - -### min_sync_commit_size - -The minimum number of events that must be satisfied to commit a transaction. If the number of events received by Fe is less than it, it will continue to wait for the next batch of data until the time exceeds `sync_commit_interval_second`. The default value is 10000 events. If you want to modify this configuration, please make sure that this value is smaller than the `canal.instance.memory.buffer.size` configuration on the canal side (default 16384), otherwise Fe will try to get the queue length longer than the store before ack More events cause the store queue to block until it times out. - -Default: 10000 - -Is it possible to configure dynamically: true - -Whether it is a configuration item unique to the Master FE node: true - -### min_bytes_sync_commit - -The minimum data size required to commit a transaction. If the data size received by Fe is smaller than it, it will continue to wait for the next batch of data until the time exceeds `sync_commit_interval_second`. The default value is 15MB, if you want to modify this configuration, please make sure this value is less than the product of `canal.instance.memory.buffer.size` and `canal.instance.memory.buffer.memunit` on the canal side (default 16MB), otherwise Before the ack, Fe will try to obtain data that is larger than the store space, causing the store queue to block until it times out. - -Default: 15*1024*1024 (15M) - -Is it possible to configure dynamically: true - -Whether it is a configuration item unique to the Master FE node: true - -### max_bytes_sync_commit - - The maximum number of threads in the data synchronization job thread pool. There is only one thread pool in the entire FE, which is used to process all data synchronization tasks in the FE that send data to the BE. The implementation of the thread pool is in the `SyncTaskPool` class. - -Default: 10 - -Is it possible to dynamically configure: false - -Is it a configuration item unique to the Master FE node: false diff --git a/docs/en/administrator-guide/config/user_property.md b/docs/en/administrator-guide/config/user_property.md deleted file mode 100644 index 27a2d3653f..0000000000 --- a/docs/en/administrator-guide/config/user_property.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -{ - "title": "User Property", - "language": "en" -} ---- - - - -# User configuration item - -This document mainly introduces related configuration items at the User level. The configuration of the User level is mainly effective for a single user. Each user can set their own User property. Does not affect each other. - -## View configuration items - -After the FE is started, on the MySQL client, use the following command to view the User configuration items: - -`SHOW PROPERTY [FOR user] [LIKE key pattern]` - -The specific syntax can be queried through the command: `help show property;`. - -## Set configuration items - -After FE is started, on the MySQL client, modify the User configuration items with the following command: - -`SET PROPERTY [FOR'user'] 'key' = 'value' [,'key' ='value']` - -The specific syntax can be queried through the command: `help set property;`. - -User-level configuration items will only take effect for the specified users, and will not affect the configuration of other users. - -## Application examples - -1. Modify the max_user_connections of user Billie - - Use `SHOW PROPERTY FOR 'Billie' LIKE '%max_user_connections%';` to check that the current maximum number of links for Billie users is 100. - - Use `SET PROPERTY FOR 'Billie' 'max_user_connections' = '200';` to modify the current maximum number of connections for Billie users to 200. - -## Configuration item list - -### max_user_connections - - The maximum number of user connections, the default value is 100 In general, this parameter does not need to be changed unless the number of concurrent queries exceeds the default value. - -### max_query_instances - - The maximum number of instances that the user can use at a certain point in time, The default value is -1, negative number means use default_max_query_instances config. - -### resource - -### quota - -### default_load_cluster - -### load_cluster diff --git a/docs/en/administrator-guide/dynamic-partition.md b/docs/en/administrator-guide/dynamic-partition.md deleted file mode 100644 index bf4c1d56ef..0000000000 --- a/docs/en/administrator-guide/dynamic-partition.md +++ /dev/null @@ -1,464 +0,0 @@ ---- -{ - "title": "Dynamic Partition", - "language": "en" -} ---- - - - -# Dynamic Partition - -Dynamic partition is a new feature introduced in Doris version 0.12. It's designed to manage partition's Time-to-Life (TTL), reducing the burden on users. - -At present, the functions of dynamically adding partitions and dynamically deleting partitions are realized. - -Dynamic partitioning is only supported for Range partitions. - -## Noun Interpretation - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. - -## Principle - -In some usage scenarios, the user will partition the table according to the day and perform routine tasks regularly every day. At this time, the user needs to manually manage the partition. Otherwise, the data load may fail because the user does not create a partition. This brings additional maintenance costs to the user. - -Through the dynamic partitioning feature, users can set the rules of dynamic partitioning when building tables. FE will start a background thread to create or delete partitions according to the rules specified by the user. Users can also change existing rules at runtime. - -## Usage - -### Establishment of tables - -The rules for dynamic partitioning can be specified when the table is created or modified at runtime. Currently,dynamic partition rules can only be set for partition tables with single partition columns. - -* Specified when creating table - - ``` - CREATE TABLE tbl1 - (...) - PROPERTIES - ( - "dynamic_partition.prop1" = "value1", - "dynamic_partition.prop2" = "value2", - ... - ) - ``` - -* Modify at runtime - - ``` - ALTER TABLE tbl1 SET - ( - "dynamic_partition.prop1" = "value1", - "dynamic_partition.prop2" = "value2", - ... - ) - ``` - -### Dynamic partition rule parameters - -The rules of dynamic partition are prefixed with `dynamic_partition.`: - -* `dynamic_partition.enable` - - Whether to enable the dynamic partition feature. Can be specified as `TRUE` or` FALSE`. If not filled, the default is `TRUE`. If it is `FALSE`, Doris will ignore the dynamic partitioning rules of the table. - -* `dynamic_partition.time_unit` - - The unit for dynamic partition scheduling. Can be specified as `HOUR`,`DAY`,` WEEK`, and `MONTH`, means to create or delete partitions by hour, day, week, and month, respectively. - - When specified as `HOUR`, the suffix format of the dynamically created partition name is `yyyyMMddHH`, for example, `2020032501`. *When the time unit is HOUR, the data type of partition column cannot be DATE.* - - When specified as `DAY`, the suffix format of the dynamically created partition name is `yyyyMMdd`, for example, `20200325`. - - When specified as `WEEK`, the suffix format of the dynamically created partition name is `yyyy_ww`. That is, the week of the year of current date. For example, the suffix of the partition created for `2020-03-25` is `2020_13`, indicating that it is currently the 13th week of 2020. - - When specified as `MONTH`, the suffix format of the dynamically created partition name is `yyyyMM`, for example, `202003`. - -* `dynamic_partition.time_zone` - - The time zone of the dynamic partition, if not filled in, defaults to the time zone of the current machine's system, such as `Asia/Shanghai`, if you want to know the supported TimeZone, you can found in `https://en.wikipedia.org/wiki/List_of_tz_database_time_zones`. - -* `dynamic_partition.start` - - The starting offset of the dynamic partition, usually a negative number. Depending on the `time_unit` attribute, based on the current day (week / month), the partitions with a partition range before this offset will be deleted. If not filled, the default is `-2147483648`, that is, the history partition will not be deleted. - -* `dynamic_partition.end` - -    The end offset of the dynamic partition, usually a positive number. According to the difference of the `time_unit` attribute, the partition of the corresponding range is created in advance based on the current day (week / month). - -* `dynamic_partition.prefix` - -    The dynamically created partition name prefix. - -* `dynamic_partition.buckets` - -    The number of buckets corresponding to the dynamically created partitions. - -* `dynamic_partition.replication_num` - - The replication number of dynamic partition.If not filled in, defaults to the number of table's replication number.     - -* `dynamic_partition.start_day_of_week` - -    When `time_unit` is` WEEK`, this parameter is used to specify the starting point of the week. The value ranges from 1 to 7. Where 1 is Monday and 7 is Sunday. The default is 1, which means that every week starts on Monday. -     -* `dynamic_partition.start_day_of_month` - -    When `time_unit` is` MONTH`, this parameter is used to specify the start date of each month. The value ranges from 1 to 28. 1 means the 1st of every month, and 28 means the 28th of every month. The default is 1, which means that every month starts at 1st. The 29, 30 and 31 are not supported at the moment to avoid ambiguity caused by lunar years or months. - -* `dynamic_partition.create_history_partition` - - The default is false. When set to true, Doris will automatically create all partitions, as described in the creation rules below. At the same time, the parameter `max_dynamic_partition_num` of FE will limit the total number of partitions to avoid creating too many partitions at once. When the number of partitions expected to be created is greater than `max_dynamic_partition_num`, the operation will fail. - - When the `start` attribute is not specified, this parameter has no effect. - -* `dynamic_partition.history_partition_num` - - When `create_history_partition` is `true`, this parameter is used to specify the number of history partitions. The default value is -1, which means it is not set. - -* `dynamic_partition.hot_partition_num` - - Specify how many of the latest partitions are hot partitions. For hot partition, the system will automatically set its `storage_medium` parameter to SSD, and set `storage_cooldown_time`. - - `hot_partition_num` is all partitions in the previous n days and in the future. - - - Let us give an example. Suppose today is 2021-05-20, partition by day, and the properties of dynamic partition are set to: hot_partition_num=2, end=3, start=-3. Then the system will automatically create the following partitions, and set the `storage_medium` and `storage_cooldown_time` properties: - - ``` - p20210517: ["2021-05-17", "2021-05-18") storage_medium=HDD storage_cooldown_time=9999-12-31 23:59:59 - p20210518: ["2021-05-18", "2021-05-19") storage_medium=HDD storage_cooldown_time=9999-12-31 23:59:59 - p20210519: ["2021-05-19", "2021-05-20") storage_medium=SSD storage_cooldown_time=2021-05-21 00:00:00 - p20210520: ["2021-05-20", "2021-05-21") storage_medium=SSD storage_cooldown_time=2021-05-22 00:00:00 - p20210521: ["2021-05-21", "2021-05-22") storage_medium=SSD storage_cooldown_time=2021-05-23 00:00:00 - p20210522: ["2021-05-22", "2021-05-23") storage_medium=SSD storage_cooldown_time=2021-05-24 00:00:00 - p20210523: ["2021-05-23", "2021-05-24") storage_medium=SSD storage_cooldown_time=2021-05-25 00:00:00 - ``` - - -* `dynamic_partition.reserved_history_periods` - - The range of reserved history periods. It should be in the form of `[yyyy-MM-dd,yyyy-MM-dd],[...,...]` while the `dynamic_partition.time_unit` is "DAY, WEEK, and MONTH". And it should be in the form of `[yyyy-MM-dd HH:mm:ss,yyyy-MM-dd HH:mm:ss],[...,...]` while the dynamic_partition.time_unit` is "HOUR". And no more spaces expected. The default value is `"NULL"`, which means it is not set. - - Let us give an example. Suppose today is 2021-09-06,partitioned by day, and the properties of dynamic partition are set to: - - ```time_unit="DAY/WEEK/MONTH", end=3, start=-3, reserved_history_periods="[2020-06-01,2020-06-20],[2020-10-31,2020-11-15]"```. - - The the system will automatically reserve following partitions in following period : - - ``` - ["2020-06-01","2020-06-20"], - ["2020-10-31","2020-11-15"] - ``` - or - - ```time_unit="HOUR", end=3, start=-3, reserved_history_periods="[2020-06-01 00:00:00,2020-06-01 03:00:00]"```. - - The the system will automatically reserve following partitions in following period : - - ``` - ["2020-06-01 00:00:00","2020-06-01 03:00:00"] - ``` - - Otherwise, every `[...,...]` in `reserved_history_periods` is a couple of properties, and they should be set at the same time. And the first date can't be larger than the second one. - - -#### Create History Partition Rules - -When `create_history_partition` is `true`, i.e. history partition creation is enabled, Doris determines the number of history partitions to be created based on `dynamic_partition.start` and `dynamic_partition.history_partition_num`. - -Assuming the number of history partitions to be created is `expect_create_partition_num`, the number is as follows according to different settings. - -1. `create_history_partition` = `true` - - `dynamic_partition.history_partition_num` is not set, i.e. -1. - `expect_create_partition_num` = `end` - `start`; - - - `dynamic_partition.history_partition_num` is set - `expect_create_partition_num` = `end` - max(`start`, `-histoty_partition_num`); - -2. `create_history_partition` = `false` - No history partition will be created, `expect_create_partition_num` = `end` - 0; - -When `expect_create_partition_num` is greater than `max_dynamic_partition_num` (default 500), creating too many partitions is prohibited. - -**Examples:** - -1. Suppose today is 2021-05-20, partition by day, and the attributes of dynamic partition are set to `create_history_partition=true, end=3, start=-3, history_partition_num=1`, then the system will automatically create the following partitions. - - ``` - p20210519 - p20210520 - p20210521 - p20210522 - p20210523 - ``` - -2. `history_partition_num=5` and keep the rest attributes as in 1, then the system will automatically create the following partitions. - - ``` - p20210517 - p20210518 - p20210519 - p20210520 - p20210521 - p20210522 - p20210523 - ``` - -3. `history_partition_num=-1` i.e., if you do not set the number of history partitions and keep the rest of the attributes as in 1, the system will automatically create the following partitions. - - ``` - p20210517 - p20210518 - p20210519 - p20210520 - p20210521 - p20210522 - p20210523 - ``` - -### Notice - -If some partitions between `dynamic_partition.start` and `dynamic_partition.end` are lost due to some unexpected circumstances when using dynamic partition, the lost partitions between the current time and `dynamic_partition.end` will be recreated, but the lost partitions between `dynamic_partition.start` and the current time will not be recreated. - -### Example - -1. Table `tbl1` partition column k1, type is DATE, create a dynamic partition rule. By day partition, only the partitions of the last 7 days are kept, and the partitions of the next 3 days are created in advance. - - ``` - CREATE TABLE tbl1 - ( - k1 DATE, - ... - ) - PARTITION BY RANGE(k1) () - DISTRIBUTED BY HASH(k1) - PROPERTIES - ( - "dynamic_partition.enable" = "true", - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-7", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32" - ); - ``` - - Suppose the current date is 2020-05-29. According to the above rules, tbl1 will produce the following partitions: - - ``` - p20200529: ["2020-05-29", "2020-05-30") - p20200530: ["2020-05-30", "2020-05-31") - p20200531: ["2020-05-31", "2020-06-01") - p20200601: ["2020-06-01", "2020-06-02") - ``` - - On the next day, 2020-05-30, a new partition will be created `p20200602: [" 2020-06-02 "," 2020-06-03 ")` - - On 2020-06-06, because `dynamic_partition.start` is set to 7, the partition 7 days ago will be deleted, that is, the partition `p20200529` will be deleted. - -2. Table tbl1 partition column k1, type is DATETIME, create a dynamic partition rule. Partition by week, only keep the partition of the last 2 weeks, and create the partition of the next 2 weeks in advance. - - ``` - CREATE TABLE tbl1 - ( - k1 DATETIME, - ... - ) - PARTITION BY RANGE(k1) () - DISTRIBUTED BY HASH(k1) - PROPERTIES - ( - "dynamic_partition.enable" = "true", - "dynamic_partition.time_unit" = "WEEK", - "dynamic_partition.start" = "-2", - "dynamic_partition.end" = "2", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "8" - ); - ``` - - Suppose the current date is 2020-05-29, which is the 22nd week of 2020. The default week starts on Monday. Based on the above rules, tbl1 will produce the following partitions: - - ``` - p2020_22: ["2020-05-25 00:00:00", "2020-06-01 00:00:00") - p2020_23: ["2020-06-01 00:00:00", "2020-06-08 00:00:00") - p2020_24: ["2020-06-08 00:00:00", "2020-06-15 00:00:00") - ``` - - The start date of each partition is Monday of the week. At the same time, because the type of the partition column k1 is DATETIME, the partition value will fill the hour, minute and second fields, and all are 0. - - On 2020-06-15, the 25th week, the partition 2 weeks ago will be deleted, ie `p2020_22` will be deleted. - - In the above example, suppose the user specified the start day of the week as `"dynamic_partition.start_day_of_week" = "3"`, that is, set Wednesday as the start of week. The partition is as follows: - - ``` - p2020_22: ["2020-05-27 00:00:00", "2020-06-03 00:00:00") - p2020_23: ["2020-06-03 00:00:00", "2020-06-10 00:00:00") - p2020_24: ["2020-06-10 00:00:00", "2020-06-17 00:00:00") - ``` - - That is, the partition ranges from Wednesday of the current week to Tuesday of the next week. - - * Note: 2019-12-31 and 2020-01-01 are in same week, if the starting date of the partition is 2019-12-31, the partition name is `p2019_53`, if the starting date of the partition is 2020-01 -01, the partition name is `p2020_01`. - -3. Table tbl1 partition column k1, type is DATE, create a dynamic partition rule. Partition by month without deleting historical partitions, and create partitions for the next 2 months in advance. At the same time, set the starting date on the 3rd of each month. - - ``` - CREATE TABLE tbl1 - ( - k1 DATE, - ... - ) - PARTITION BY RANGE(k1) () - DISTRIBUTED BY HASH(k1) - PROPERTIES - ( - "dynamic_partition.enable" = "true", - "dynamic_partition.time_unit" = "MONTH", - "dynamic_partition.end" = "2", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "8", - "dynamic_partition.start_day_of_month" = "3" - ); - ``` - - Suppose the current date is 2020-05-29. Based on the above rules, tbl1 will produce the following partitions: - - ``` - p202005: ["2020-05-03", "2020-06-03") - p202006: ["2020-06-03", "2020-07-03") - p202007: ["2020-07-03", "2020-08-03") - ``` - - Because `dynamic_partition.start` is not set, the historical partition will not be deleted. - - Assuming that today is 2020-05-20, and set 28th as the start of each month, the partition range is: - - ``` - p202004: ["2020-04-28", "2020-05-28") - p202005: ["2020-05-28", "2020-06-28") - p202006: ["2020-06-28", "2020-07-28") - ``` - -### Modify Dynamic Partition Properties - -You can modify the properties of the dynamic partition with the following command - -``` -ALTER TABLE tbl1 SET -( - "dynamic_partition.prop1" = "value1", - ... -); -``` - -The modification of certain attributes may cause conflicts. Assume that the partition granularity was DAY and the following partitions have been created: - -``` -p20200519: ["2020-05-19", "2020-05-20") -p20200520: ["2020-05-20", "2020-05-21") -p20200521: ["2020-05-21", "2020-05-22") -``` - -If the partition granularity is changed to MONTH at this time, the system will try to create a partition with the range `["2020-05-01", "2020-06-01")`, and this range conflicts with the existing partition. So it cannot be created. And the partition with the range `["2020-06-01", "2020-07-01")` can be created normally. Therefore, the partition between 2020-05-22 and 2020-05-30 needs to be filled manually. - -### Check Dynamic Partition Table Scheduling Status - -You can further view the scheduling of dynamic partitioned tables by using the following command: - -``` -mysql> SHOW DYNAMIC PARTITION TABLES; -+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+ -| TableName | Enable | TimeUnit | Start | End | Prefix | Buckets | StartOf | LastUpdateTime | LastSchedulerTime | State | LastCreatePartitionMsg | LastDropPartitionMsg | ReservedHistoryPeriods | -+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+ -| d3 | true | WEEK | -3 | 3 | p | 1 | MONDAY | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | [2021-12-01,2021-12-31] | -| d5 | true | DAY | -7 | 3 | p | 32 | N/A | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -| d4 | true | WEEK | -3 | 3 | p | 1 | WEDNESDAY | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -| d6 | true | MONTH | -2147483648 | 2 | p | 8 | 3rd | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -| d2 | true | DAY | -3 | 3 | p | 32 | N/A | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -| d7 | true | MONTH | -2147483648 | 5 | p | 8 | 24th | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+ -7 rows in set (0.02 sec) -``` - -* LastUpdateTime: The last time of modifying dynamic partition properties -* LastSchedulerTime: The last time of performing dynamic partition scheduling -* State: The state of the last execution of dynamic partition scheduling -* LastCreatePartitionMsg: Error message of the last time to dynamically add partition scheduling -* LastDropPartitionMsg: Error message of the last execution of dynamic deletion partition scheduling - -## Advanced Operation - -### FE Configuration Item - -* dynamic\_partition\_enable - - Whether to enable Doris's dynamic partition feature. The default value is false, which is off. This parameter only affects the partitioning operation of dynamic partition tables, not normal tables. You can modify the parameters in `fe.conf` and restart FE to take effect. You can also execute the following commands at runtime to take effect: - - MySQL protocol: - - `ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true")` - - HTTP protocol: - - `curl --location-trusted -u username:password -XGET http://fe_host:fe_http_port/api/_set_config?dynamic_partition_enable=true` - - To turn off dynamic partitioning globally, set this parameter to false. - -* dynamic\_partition\_check\_interval\_seconds - - The execution frequency of dynamic partition threads defaults to 3600 (1 hour), that is, scheduling is performed every 1 hour. You can modify the parameters in `fe.conf` and restart FE to take effect. You can also modify the following commands at runtime: - - MySQL protocol: - - `ADMIN SET FRONTEND CONFIG ("dynamic_partition_check_interval_seconds" = "7200")` - - HTTP protocol: - - `curl --location-trusted -u username:password -XGET http://fe_host:fe_http_port/api/_set_config?dynamic_partition_check_interval_seconds=432000` - -### Converting dynamic and manual partition tables to each other - -For a table, dynamic and manual partitioning can be freely converted, but they cannot exist at the same time, there is and only one state. - -#### Converting Manual Partitioning to Dynamic Partitioning - -If a table is not dynamically partitioned when it is created, it can be converted to dynamic partitioning at runtime by modifying the dynamic partitioning properties with `ALTER TABLE`, an example of which can be seen with `HELP ALTER TABLE`. - -When dynamic partitioning feature is enabled, Doris will no longer allow users to manage partitions manually, but will automatically manage partitions based on dynamic partition properties. - -**NOTICE**: If `dynamic_partition.start` is set, historical partitions with a partition range before the start offset of the dynamic partition will be deleted. - -#### Converting Dynamic Partitioning to Manual Partitioning - -The dynamic partitioning feature can be disabled by executing `ALTER TABLE tbl_name SET ("dynamic_partition.enable" = "false") ` and converting it to a manual partition table. - -When dynamic partitioning feature is disabled, Doris will no longer manage partitions automatically, and users will have to create or delete partitions manually by using `ALTER TABLE`. - -## Common problem - -1. After creating the dynamic partition table, it prompts ```Could not create table with dynamic partition when fe config dynamic_partition_enable is false``` - - Because the main switch of dynamic partition, that is, the configuration of FE ```dynamic_partition_enable``` is false, the dynamic partition table cannot be created. - - At this time, please modify the FE configuration file, add a line ```dynamic_partition_enable=true```, and restart FE. Or execute the command ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true") to turn on the dynamic partition switch. diff --git a/docs/en/administrator-guide/export-manual.md b/docs/en/administrator-guide/export-manual.md deleted file mode 100644 index df2dbbb4df..0000000000 --- a/docs/en/administrator-guide/export-manual.md +++ /dev/null @@ -1,198 +0,0 @@ ---- -{ - "title": "Data export", - "language": "en" -} ---- - - - -# Data export - -Export is a function provided by Doris to export data. This function can export user-specified table or partition data in text format to remote storage through Broker process, such as HDFS/BOS. - -This document mainly introduces the basic principles, usage, best practices and precautions of Export. - -## Noun Interpretation - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. -* Broker: Doris can manipulate files for remote storage through the Broker process. -* Tablet: Data fragmentation. A table is divided into multiple data fragments. - -## Principle - -After the user submits an Export job. Doris counts all Tablets involved in this job. These tablets are then grouped to generate a special query plan for each group. The query plan reads the data on the included tablet and then writes the data to the specified path of the remote storage through Broker. It can also be directly exported to the remote storage that supports S3 protocol through S3 protocol. - -The overall mode of dispatch is as follows: - -``` -+--------+ -| Client | -+---+----+ - | 1. Submit Job - | -+---v--------------------+ -| FE | -| | -| +-------------------+ | -| | ExportPendingTask | | -| +-------------------+ | -| | 2. Generate Tasks -| +--------------------+ | -| | ExportExporingTask | | -| +--------------------+ | -| | -| +-----------+ | +----+ +------+ +---------+ -| | QueryPlan +----------------> BE +--->Broker+---> | -| +-----------+ | +----+ +------+ | Remote | -| +-----------+ | +----+ +------+ | Storage | -| | QueryPlan +----------------> BE +--->Broker+---> | -| +-----------+ | +----+ +------+ +---------+ -+------------------------+ 3. Execute Tasks - -``` - -1. The user submits an Export job to FE. -2. FE's Export scheduler performs an Export job in two stages: - 1. PENDING: FE generates Export Pending Task, sends snapshot command to BE, and takes a snapshot of all Tablets involved. And generate multiple query plans. - 2. EXPORTING: FE generates Export ExportingTask and starts executing the query plan. - -### query plan splitting - -The Export job generates multiple query plans, each of which scans a portion of the Tablet. The number of Tablets scanned by each query plan is specified by the FE configuration parameter `export_tablet_num_per_task`, which defaults to 5. That is, assuming a total of 100 Tablets, 20 query plans will be generated. Users can also specify this number by the job attribute `tablet_num_per_task`, when submitting a job. - -Multiple query plans for a job are executed sequentially. - -### Query Plan Execution - -A query plan scans multiple fragments, organizes read data in rows, batches every 1024 actions, and writes Broker to remote storage. - -The query plan will automatically retry three times if it encounters errors. If a query plan fails three retries, the entire job fails. - -Doris will first create a temporary directory named `doris_export_tmp_12345` (where `12345` is the job id) in the specified remote storage path. The exported data is first written to this temporary directory. Each query plan generates a file with an example file name: - -`export-data-c69fcf2b6db5420f-a96b94c1ff8bccef-1561453713822` - -Among them, `c69fcf2b6db5420f-a96b94c1ff8bccef` is the query ID of the query plan. ` 1561453713822` Timestamp generated for the file. - -When all data is exported, Doris will rename these files to the user-specified path. - -## Use examples - -Export's detailed commands can be passed through `HELP EXPORT;` Examples are as follows: - -``` -EXPORT TABLE db1.tbl1 -PARTITION (p1,p2) -[WHERE [expr]] -TO "bos://bj-test-cmy/export/" -PROPERTIES -( - "label"="mylabel", - "column_separator"=",", - "columns" = "col1,col2", - "exec_mem_limit"="2147483648", - "timeout" = "3600" -) -WITH BROKER "hdfs" -( - "username" = "user", - "password" = "passwd" -); -``` - -* `label`: The identifier of this export job. You can use this identifier to view the job status later. -* `column_separator`: Column separator. The default is `\t`. Supports invisible characters, such as'\x07'. -* `column`: columns to be exported, separated by commas, if this parameter is not filled in, all columns of the table will be exported by default. -* `line_delimiter`: Line separator. The default is `\n`. Supports invisible characters, such as'\x07'. -* `exec_mem_limit`: Represents the memory usage limitation of a query plan on a single BE in an Export job. Default 2GB. Unit bytes. -* `timeout`: homework timeout. Default 2 hours. Unit seconds. -* `tablet_num_per_task`: The maximum number of fragments allocated per query plan. The default is 5. - -After submitting a job, the job status can be imported by querying the `SHOW EXPORT` command. The results are as follows: - -``` - JobId: 14008 - Label: mylabel - State: FINISHED - Progress: 100% - TaskInfo: {"partitions":["*"],"exec mem limit":2147483648,"column separator":",","line delimiter":"\n","tablet num":1,"broker":"hdfs","coord num":1,"db":"default_cluster:db1","tbl":"tbl3"} - Path: bos://bj-test-cmy/export/ -CreateTime: 2019-06-25 17:08:24 - StartTime: 2019-06-25 17:08:28 -FinishTime: 2019-06-25 17:08:34 - Timeout: 3600 - ErrorMsg: N/A -``` - - -* JobId: The unique ID of the job -* Label: Job identifier -* State: Job status: - * PENDING: Jobs to be Scheduled - * EXPORTING: Data Export - * FINISHED: Operation Successful - * CANCELLED: Job Failure -* Progress: Work progress. The schedule is based on the query plan. Assuming a total of 10 query plans have been completed, the progress will be 30%. -* TaskInfo: Job information in Json format: - * db: database name - * tbl: Table name - * partitions: Specify the exported partition. `*` Represents all partitions. - * exec MEM limit: query plan memory usage limit. Unit bytes. - * column separator: The column separator for the exported file. - * line delimiter: The line separator for the exported file. - * tablet num: The total number of tablets involved. - * Broker: The name of the broker used. - * Coord num: Number of query plans. -* Path: Export path on remote storage. -* CreateTime/StartTime/FinishTime: Creation time, start scheduling time and end time of jobs. -* Timeout: Job timeout. The unit is seconds. This time is calculated from CreateTime. -* Error Msg: If there is an error in the job, the cause of the error is shown here. - -## Best Practices - -### Splitting Query Plans - -How many query plans need to be executed for an Export job depends on the total number of Tablets and how many Tablets can be allocated for a query plan at most. Since multiple query plans are executed serially, the execution time of jobs can be reduced if more fragments are processed by one query plan. However, if the query plan fails (e.g., the RPC fails to call Broker, the remote storage jitters, etc.), too many tablets can lead to a higher retry cost of a query plan. Therefore, it is necessary to arrange the number of query plans and the number of fragments to be scanned for each query plan in order to balance the execution time and the success rate of execution. It is generally recommended that the amount of data scanned by a query plan be within 3-5 GB (the size and number of tables in a table can be viewed by `SHOW TABLET FROM tbl_name;`statement. - -### exec\_mem\_limit - -Usually, a query plan for an Export job has only two parts `scan`- `export`, and does not involve computing logic that requires too much memory. So usually the default memory limit of 2GB can satisfy the requirement. But in some scenarios, such as a query plan, too many Tablets need to be scanned on the same BE, or too many data versions of Tablets, may lead to insufficient memory. At this point, larger memory needs to be set through this parameter, such as 4 GB, 8 GB, etc. - -## Notes - -* It is not recommended to export large amounts of data at one time. The maximum amount of exported data recommended by an Export job is tens of GB. Excessive export results in more junk files and higher retry costs. -* If the amount of table data is too large, it is recommended to export it by partition. -* During the operation of the Export job, if FE restarts or cuts the master, the Export job will fail, requiring the user to resubmit. -* If the Export job fails, the `__doris_export_tmp_xxx` temporary directory generated in the remote storage and the generated files will not be deleted, requiring the user to delete them manually. -* If the Export job runs successfully, the `__doris_export_tmp_xxx` directory generated in the remote storage may be retained or cleared according to the file system semantics of the remote storage. For example, in Baidu Object Storage (BOS), after removing the last file in a directory through rename operation, the directory will also be deleted. If the directory is not cleared, the user can clear it manually. -* When the Export runs successfully or fails, the FE reboots or cuts, then some information of the jobs displayed by `SHOW EXPORT` will be lost and cannot be viewed. -* Export jobs only export data from Base tables, not Rollup Index. -* Export jobs scan data and occupy IO resources, which may affect the query latency of the system. - -## Relevant configuration - -### FE - -* `expo_checker_interval_second`: Scheduling interval of Export job scheduler, default is 5 seconds. Setting this parameter requires restarting FE. -* `export_running_job_num_limit `: Limit on the number of Export jobs running. If exceeded, the job will wait and be in PENDING state. The default is 5, which can be adjusted at run time. -* `Export_task_default_timeout_second`: Export job default timeout time. The default is 2 hours. It can be adjusted at run time. -* `export_tablet_num_per_task`: The maximum number of fragments that a query plan is responsible for. The default is 5. diff --git a/docs/en/administrator-guide/ldap.md b/docs/en/administrator-guide/ldap.md deleted file mode 100644 index ceaebb7c05..0000000000 --- a/docs/en/administrator-guide/ldap.md +++ /dev/null @@ -1,175 +0,0 @@ ---- -{ - "title": "LDAP", - "language": "en" -} ---- - - - -# LDAP - -Access to third-party LDAP services to provide authentication login and group authorization services for Doris. - -LDAP authentication login complements Doris authentication login by accessing the LDAP service for password authentication; Doris uses LDAP to authenticate the user's password first; if the user does not exist in the LDAP service, it continues to use Doris to authenticate the password; if the LDAP password is correct but there is no corresponding account in Doris, a temporary user is created to log in to Doris. - -LDAP group authorization, is to map the group in LDAP to the Role in Doris, if the user belongs to multiple user groups in LDAP, after logging into Doris the user will get the permission of all groups corresponding to the Role, requiring the group name to be the same as the Role name. - -## Noun Interpretation - -* LDAP: Lightweight directory access protocol that enables centralized management of account passwords. -* Privilege: Permissions act on nodes, databases or tables. Different permissions represent different permission to operate. -* Role: Doris can create custom named roles. A role can be thought of as a collection of permissions. - -## Enable LDAP Authentication -### Server-side Configuration - -You need to configure the LDAP basic information in the fe/conf/ldap.conf file, and the LDAP administrator password needs to be set using sql statements. - -#### Configure the fe/conf/ldap.conf file: -* ldap_authentication_enabled = false - Set the value to "true" to enable LDAP authentication; when the value is "false", LDAP authentication is not enabled and all other configuration items of this profile are invalid.Set the value to "true" to enable LDAP authentication; when the value is "false", LDAP authentication is not enabled and all other configuration items of this profile are invalid. - -* ldap_host = 127.0.0.1 - LDAP service ip. - -* ldap_port = 389 - LDAP service port, the default plaintext transfer port is 389, currently Doris' LDAP function only supports plaintext password transfer. - -* ldap_admin_name = cn=admin,dc=domain,dc=com - LDAP administrator account "Distinguished Name". When a user logs into Doris using LDAP authentication, Doris will bind the administrator account to search for user information in LDAP. - -* ldap_user_basedn = ou=people,dc=domain,dc=com - Doris base dn when searching for user information in LDAP. - -* ldap_user_filter = (&(uid={login})) - - For Doris' filtering criteria when searching for user information in LDAP, the placeholder "{login}" will be replaced with the login username. You must ensure that the user searched by this filter is unique, otherwise Doris will not be able to verify the password through LDAP and the error message "ERROR 5081 (42000): user is not unique in LDAP server." will appear when logging in. - - For example, if you use the LDAP user node uid attribute as the username to log into Doris, you can configure it as: - ldap_user_filter = (&(uid={login})); - This item can be configured using the LDAP user mailbox prefix as the user name: - ldap_user_filter = (&(mail={login}@baidu.com)). - -* ldap_group_basedn = ou=group,dc=domain,dc=com - base dn when Doris searches for group information in LDAP. if this item is not configured, LDAP group authorization will not be enabled. - -#### Set the LDAP administrator password: -After configuring the ldap.conf file, start fe, log in to Doris with the root or admin account, and execute sql: -``` -set ldap_admin_password = 'ldap_admin_password'; -``` - -### Client-side configuration -Client-side LDAP authentication requires the mysql client-side explicit authentication plugin to be enabled. Logging into Doris using the command line enables the mysql explicit authentication plugin in one of two ways. - -* Set the environment variable LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN to value 1. - For example, in a linux or max environment you can use the command: - ``` - echo "export LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN=1" >> ~/.bash_profile && source ~/.bash_profile - ``` - -* Add the parameter "--enable-cleartext-plugin" each time you log in to Doris. - ``` - mysql -hDORIS_HOST -PDORIS_PORT -u user -p --enable-cleartext-plugin - - Enter ldap password - ``` - -## LDAP authentication detailed explanation -LDAP password authentication and group authorization are complementary to Doris password authentication and authorization. Enabling LDAP functionality does not completely replace Doris password authentication and authorization, but coexists with Doris password authentication and authorization. - -### LDAP authentication login details -When LDAP is enabled, users have the following in Doris and DLAP: - -|LDAP User|Doris User|Password|Login Status|Login to Doris users| -|--|--|--|--|--| -|Existent|Existent|LDAP Password|Login successful|Doris User| -|Existent|Existent|Doris Password|Login failure|None| -|Non-Existent|Existent|Doris Password|Login successful|Doris User| -|Existent|Non-Existent|LDAP Password|Login successful|Ldap Temporary user| - -After LDAP is enabled, when a user logs in using mysql client, Doris will first verify the user's password through the LDAP service, and if the LDAP user exists and the password is correct, Doris will use the user to log in; at this time, if the corresponding account exists, Doris will directly log in to the account, and if the corresponding account does not exist, it will create a temporary account for the user and log in to the account. The temporary account has the appropriate pair of permissions (see LDAP Group Authorization) and is only valid for the current connection. doris does not create the user and does not generate metadata for creating the user pair. -If no login user exists in the LDAP service, Doris is used for password authentication. - -The following assumes that LDAP authentication is enabled, ldap_user_filter = (&(uid={login})) is configured, and all other configuration items are correct, and the client sets the environment variable LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN=1 - -For example: - -#### 1:Accounts exist in both Doris and LDAP. - -Doris account exists: jack@'172.10.1.10', password: 123456 -LDAP user node presence attribute: uid: jack user password: abcdef -The jack@'172.10.1.10' account can be logged into by logging into Doris using the following command: -``` -mysql -hDoris_HOST -PDoris_PORT -ujack -p abcdef -``` - -Login will fail with the following command: -``` -mysql -hDoris_HOST -PDoris_PORT -ujack -p 123456 -``` - -#### 2:The user exists in LDAP and the corresponding account does not exist in Doris. - -LDAP user node presence attribute: uid: jack User password: abcdef -Use the following command to create a temporary user and log in to jack@'%', the temporary user has basic privileges DatabasePrivs: Select_priv, Doris will delete the temporary user after the user logs out and logs in: -``` -mysql -hDoris_HOST -PDoris_PORT -ujack -p abcdef -``` - -#### 3:LDAP does not exist for the user. - -Doris account exists: jack@'172.10.1.10', password: 123456 -Login to the account using the Doris password, successfully: -``` -mysql -hDoris_HOST -PDoris_PORT -ujack -p 123456 -``` - -### LDAP group authorization details - -If a DLAP user dn is the "member" attribute of an LDAP group node, Doris assumes that the user belongs to the group. Doris will revoke the corresponding role privileges after the user logs out. Before using LDAP group authorization, you should create the corresponding role pairs in Doris and authorize the roles. - -Login user Privileges are related to Doris user and group Privileges, as shown in the following table: -|LDAP Users|Doris Users|Login User Privileges| -|--|--|--| -|exist|exist|LDAP group Privileges + Doris user Privileges| -|Does not exist|Exists|Doris user Privileges| -|exist|non-exist|LDAP group Privileges| - -If the logged-in user is a temporary user and no group permission exists, the user has the select_priv permission of the information_schema by default - -Example: -LDAP user dn is the "member" attribute of the LDAP group node then the user is considered to belong to the group, Doris will intercept the first Rdn of group dn as the group name. -For example, if user dn is "uid=jack,ou=aidp,dc=domain,dc=com", the group information is as follows: -``` -dn: cn=doris_rd,ou=group,dc=domain,dc=com -objectClass: groupOfNames -member: uid=jack,ou=aidp,dc=domain,dc=com -``` -Then the group name is doris_rd. - -If jack also belongs to the LDAP groups doris_qa, doris_pm; Doris exists roles: doris_rd, doris_qa, doris_pm, after logging in using LDAP authentication, the user will not only have the original permissions of the account, but will also get the roles doris_rd, doris_qa and doris _pm privileges. - -## Limitations of LDAP authentication - -* The current LDAP feature of Doris only supports plaintext password authentication, that is, when a user logs in, the password is transmitted in plaintext between client and fe and between fe and LDAP service. -* The current LDAP authentication only supports password authentication under mysql protocol. If you use the Http interface, you cannot use LDAP users for authentication. -* Temporary users do not have user properties. \ No newline at end of file diff --git a/docs/en/administrator-guide/load-data/batch-delete-manual.md b/docs/en/administrator-guide/load-data/batch-delete-manual.md deleted file mode 100644 index 1efc2bcb05..0000000000 --- a/docs/en/administrator-guide/load-data/batch-delete-manual.md +++ /dev/null @@ -1,204 +0,0 @@ ---- -{ - "title": "Batch Delete", - "language": "en" -} ---- - - - -# Batch Delete -Currently, Doris supports multiple import methods such as broker load, routine load, stream load, etc. The data can only be deleted through the delete statement at present. When the delete statement is used to delete, a new data version will be generated every time delete is executed. Frequent deletion will seriously affect the query performance, and when using the delete method to delete, it is achieved by generating an empty rowset to record the deletion conditions. Each time you read, you must filter the deletion jump conditions. Also when there are many conditions, Performance affects. Compared with other systems, the implementation of greenplum is more like a traditional database product. Snowflake is implemented through the merge syntax. - -For scenarios similar to the import of cdc data, insert and delete in the data data generally appear interspersed. In this scenario, our current import method is not enough, even if we can separate insert and delete, it can solve the import problem , But still cannot solve the problem of deletion. Use the batch delete function to solve the needs of these scenarios. -There are three ways to merge data import: -1. APPEND: All data are appended to existing data -2. DELETE: delete all rows with the same key column value as the imported data -3. MERGE: APPEND or DELETE according to DELETE ON decision - -## Principle -This is achieved by adding a hidden column `__DORIS_DELETE_SIGN__`, because we are only doing batch deletion on the unique model, so we only need to add a hidden column whose type is bool and the aggregate function is replace. In be, the various aggregation write processes are the same as normal columns, and there are two read schemes: - -Remove `__DORIS_DELETE_SIGN__` when fe encounters extensions such as *, and add the condition of `__DORIS_DELETE_SIGN__ != true` by default -When be reads, a column is added for judgment, and the condition is used to determine whether to delete. - -### Import - -When importing, set the value of the hidden column to the value of the `DELETE ON` expression during fe parsing. The other aggregation behaviors are the same as the replace aggregation column - -### Read - -When reading, add the condition of `__DORIS_DELETE_SIGN__ != true` to all olapScanNodes with hidden columns, be does not perceive this process and executes normally - -### Cumulative Compaction - -In Cumulative Compaction, hidden columns are treated as normal columns, and the compaction logic remains unchanged - -### Base Compaction - -In Base Compaction, delete the rows marked for deletion to reduce the space occupied by data - -### Syntax -The import syntax design is mainly to add a column mapping that specifies the field of the delete mark column, and this column needs to be added to the imported data. The method of setting each import method is as follows - -#### stream load - -The wording of stream load adds a field to set the delete mark column in the columns field in the header. Example -`-H "columns: k1, k2, label_c3" -H "merge_type: [MERGE|APPEND|DELETE]" -H "delete: label_c3=1"` - -#### broker load - -Set the field to delete the mark column at `PROPERTIES` - -``` -LOAD LABEL db1.label1 -( - [MERGE|APPEND|DELETE] DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1") - INTO TABLE tbl1 - COLUMNS TERMINATED BY "," - (tmp_c1,tmp_c2, label_c3) - SET - ( - id=tmp_c2, - name=tmp_c1, - ) - [DELETE ON label=true] - -) -WITH BROKER'broker' -( - "username"="user", - "password"="pass" -) -PROPERTIES -( - "timeout" = "3600" - -); - -``` - -#### routine load - -Routine load adds a mapping to the `columns` field. The mapping method is the same as above, the example is as follows - -``` - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - [WITH MERGE|APPEND|DELETE] - COLUMNS(k1, k2, k3, v1, v2, label), - WHERE k1> 100 and k2 like "%doris%" - [DELETE ON label=true] - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,0,200" - ); -``` - -## Enable bulk delete support -There are two ways of enabling batch delete support: -1. By adding `enable_batch_delete_by_default=true` in the fe configuration file, all newly created tables after restarting fe support batch deletion, this option defaults to false - -2. For tables that have not changed the above fe configuration or for existing tables that do not support the bulk delete function, you can use the following statement: -`ALTER TABLE tablename ENABLE FEATURE "BATCH_DELETE"` to enable the batch delete. - -If you want to determine whether a table supports batch delete, you can set a session variable to display the hidden columns `SET show_hidden_columns=true`, and then use `desc tablename`, if there is a `__DORIS_DELETE_SIGN__` column in the output, it is supported, if not, it is not supported -## Note -1. Since import operations other than stream load may be executed out of order inside doris, if it is not stream load when importing using the `MERGE` method, it needs to be used with load sequence. For the specific syntax, please refer to the sequence column related documents -2. `DELETE ON` condition can only be used with MERGE - -## Usage example -Let's take stream load as an example to show how to use it -1. Import data normally: -``` -curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: APPEND" -T ~/table1_data http://127.0.0.1: 8130/api/test/table1/_stream_load -``` -The APPEND condition can be omitted, which has the same effect as the following statement: -``` -curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -T ~/table1_data http://127.0.0.1:8130/api/test/table1 /_stream_load -``` -2. Delete all data with the same key as the imported data -``` -curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: DELETE" -T ~/table1_data http://127.0.0.1: 8130/api/test/table1/_stream_load -``` -Before load: -``` -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 3 | 2 | tom | 2 | -| 4 | 3 | bush | 3 | -| 5 | 3 | helen | 3 | -+--------+----------+----------+------+ -``` -Load data: -``` -3,2,tom,0 -``` -After load: -``` -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 4 | 3 | bush | 3 | -| 5 | 3 | helen | 3 | -+--------+----------+----------+------+ -``` -3. Import the same row as the key column of the row with `site_id=1` -``` -curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: MERGE" -H "delete: siteid=1" -T ~/ table1_data http://127.0.0.1:8130/api/test/table1/_stream_load -``` -Before load: -``` -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 4 | 3 | bush | 3 | -| 5 | 3 | helen | 3 | -| 1 | 1 | jim | 2 | -+--------+----------+----------+------+ -``` -Load data: -``` -2,1,grace,2 -3,2,tom,2 -1,1,jim,2 -``` -After load: -``` -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 4 | 3 | bush | 3 | -| 2 | 1 | grace | 2 | -| 3 | 2 | tom | 2 | -| 5 | 3 | helen | 3 | -+--------+----------+----------+------+ -``` diff --git a/docs/en/administrator-guide/load-data/binlog-load-manual.md b/docs/en/administrator-guide/load-data/binlog-load-manual.md deleted file mode 100644 index 772162d2c2..0000000000 --- a/docs/en/administrator-guide/load-data/binlog-load-manual.md +++ /dev/null @@ -1,523 +0,0 @@ ---- -{ - "title": "Binlog Load", - "language": "en" -} ---- - - - -# Binlog Load - -The Binlog Load feature enables Doris to incrementally synchronize update operations in MySQL, so as to CDC(Change Data Capture) of data on Mysql. - -## Scenarios -* Support insert / update / delete operations -* Filter query -* Temporarily incompatible with DDL statements - -## Glossary -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, the backend node of Doris. Responsible for query execution and data storage. -* Canal: Alibaba's open source MySQL binlog parsing tool. Support incremental data subscription & consumption. -* Batch: A batch of data sent by canal to the client with a globally unique self-incrementing ID. -* SyncJob: A data synchronization job submitted by the user. -* Receiver: Responsible for subscribing to and receiving data from canal. -* Consumer: Responsible for distributing the data received by the Receiver to each channel. -* Channel: The channel that receives the data distributed by Consumer, it creates tasks for sending data, and controls the begining, committing and aborting of transaction in one table. -* Task: Task created by channel, sends data to Be when executing. - -## Principle -In the design of phase one, Binlog Load needs to rely on canal as an intermediate medium, so that canal can be pretended to be a slave node to get and parse the binlog on the MySQL master node, and then Doris can get the parsed data on the canal. This process mainly involves mysql, canal and Doris. The overall data flow is as follows: - -``` -+---------------------------------------------+ -| Mysql | -+----------------------+----------------------+ - | Binlog -+----------------------v----------------------+ -| Canal Server | -+-------------------+-----^-------------------+ - Get | | Ack -+-------------------|-----|-------------------+ -| FE | | | -| +-----------------|-----|----------------+ | -| | Sync Job | | | | -| | +------------v-----+-----------+ | | -| | | Canal Client | | | -| | | +-----------------------+ | | | -| | | | Receiver | | | | -| | | +-----------------------+ | | | -| | | +-----------------------+ | | | -| | | | Consumer | | | | -| | | +-----------------------+ | | | -| | +------------------------------+ | | -| +----+---------------+--------------+----+ | -| | | | | -| +----v-----+ +-----v----+ +-----v----+ | -| | Channel1 | | Channel2 | | Channel3 | | -| | [Table1] | | [Table2] | | [Table3] | | -| +----+-----+ +-----+----+ +-----+----+ | -| | | | | -| +--|-------+ +---|------+ +---|------+| -| +---v------+| +----v-----+| +----v-----+|| -| +----------+|+ +----------+|+ +----------+|+| -| | Task |+ | Task |+ | Task |+ | -| +----------+ +----------+ +----------+ | -+----------------------+----------------------+ - | | | -+----v-----------------v------------------v---+ -| Coordinator | -| BE | -+----+-----------------+------------------+---+ - | | | -+----v---+ +---v----+ +----v---+ -| BE | | BE | | BE | -+--------+ +--------+ +--------+ - -``` - -As shown in the figure above, the user first submits a SyncJob to the Fe. - -Then, Fe will start a Canal Client for each SyncJob to subscribe to and get data from the Canal Server. - -The Receiver in the Canal Client will receives data by the GET request. Every time a Batch is received, it will be distributed by the Consumer to different Channels according to the corresponding target table. Once a channel received data distributed by Consumer, it will submit a send task for sending data. - -A Send task is a request from Channel to Be, which contains the data of the same Batch distributed to the current channel. - -Channel controls the begin, commit and abort of transaction of single table. In a transaction, the consumer may distribute multiple Batches of data to a channel, so multiple send tasks may be generated. These tasks will not actually take effect until the transaction is committed successfully. - -When certain conditions are met (for example, a certain period of time was passed, reach the maximun data size of commit), the Consumer will block and notify each channel to try commit the transaction. - -If and only if all channels are committed successfully, Canal Server will be notified by the ACK request and Canal Client continue to get and consume data. - -If there are any Channel fails to commit, it will retrieve data from the location where the last consumption was successful and commit again (the Channel that has successfully commited before will not commmit again to ensure the idempotency of commit). - -In the whole cycle of a SyncJob, Canal Client continuously received data from Canal Server and send it to Be through the above process to complete data synchronization. - -## Configure MySQL Server - -In the master-slave synchronization of MySQL Cluster mode, the binary log file (binlog) records all data changes on the master node. Data synchronization and backup among multiple nodes of the cluster should be carried out through binlog logs, so as to improve the availability of the cluster. - -The architecture of master-slave synchronization is usually composed of a master node (responsible for writing) and one or more slave nodes (responsible for reading). All data changes on the master node will be copied to the slave node. - -**Note that: Currently, you must use MySQL version 5.7 or above to support Binlog Load** - -To enable the binlog of MySQL, you need to edit the my.cnf file and set it like: - -``` -[mysqld] -log-bin = mysql-bin # 开启 binlog -binlog-format=ROW # 选择 ROW 模式 -``` - -### Principle Description - -On MySQL, the binlog files usually name as mysql-bin.000001, mysql-bin.000002... And MySQL will automatically segment the binlog file when certain conditions are met: - -1. MySQL is restarted -2. The user enters the `flush logs` command -3. The binlog file size exceeds 1G - -To locate the latest consumption location of binlog, the binlog file name and position (offset) must be needed. - -For instance, the binlog location of the current consumption so far will be saved on each slave node to prepare for disconnection, reconnection and continued consumption at any time. - -``` ---------------------- --------------------- -| Slave | read | Master | -| FileName/Position | <<<--------------------------- | Binlog Files | ---------------------- --------------------- -``` - -For the master node, it is only responsible for writing to the binlog. Multiple slave nodes can be connected to a master node at the same time to consume different parts of the binlog log without affecting each other. - -Binlog log supports two main formats (in addition to mixed based mode): - -* Statement-based format: - - Binlog only records the SQL statements executed on the master node, and the slave node copies them to the local node for re-execution. - -* Row-based format: - - Binlog will record the data change information of each row and all columns of the master node, and the slave node will copy and execute the change of each row to the local node. - -The first format only writes the executed SQL statements. Although the log volume will be small, it has the following disadvantages: - -1. The actual data of each row is not recorded -2. The UDF, random and time functions executed on the master node will have inconsistent results on the slave node -3. The execution order of limit statements may be inconsistent - -Therefore, we need to choose the second format which parses each row of data from the binlog log. - -In the row-based format, binlog will record the timestamp, server ID, offset and other information of each binlog event. For instance, the following transaction with two insert statements: - -``` -begin; -insert into canal_test.test_tbl values (3, 300); -insert into canal_test.test_tbl values (4, 400); -commit; -``` - -There will be four binlog events, including one begin event, two insert events and one commit event: - -``` -SET TIMESTAMP=1538238301/*!*/; -BEGIN -/*!*/. -# at 211935643 -# at 211935698 -#180930 0:25:01 server id 1 end_log_pos 211935698 Table_map: 'canal_test'.'test_tbl' mapped to number 25 -#180930 0:25:01 server id 1 end_log_pos 211935744 Write_rows: table-id 25 flags: STMT_END_F -... -'/*!*/; -### INSERT INTO canal_test.test_tbl -### SET -### @1=1 -### @2=100 -# at 211935744 -#180930 0:25:01 server id 1 end_log_pos 211935771 Xid = 2681726641 -... -'/*!*/; -### INSERT INTO canal_test.test_tbl -### SET -### @1=2 -### @2=200 -# at 211935771 -#180930 0:25:01 server id 1 end_log_pos 211939510 Xid = 2681726641 -COMMIT/*!*/; -``` - -As shown above, each insert event contains modified data. During delete/update, an event can also contain multiple rows of data, making the binlog more compact. - -### Open GTID mode (Optional) - -A global transaction ID (global transaction identifier) identifies a transaction that has been committed on the master node, which is unique and valid in global. After binlog is enabled, the gtid will be written to the binlog file. - -To open the gtid mode of MySQL, you need to edit the my.cnf configuration file and set it like: - -``` -gtid-mode=on // Open gtid mode -enforce-gtid-consistency=1 // Enforce consistency between gtid and transaction -``` - -In gtid mode, the master server can easily track transactions, recover data and replicas without binlog file name and offset. - -In gtid mode, due to the global validity of gtid, the slave node will no longer need to locate the binlog location on the master node by saving the file name and offset, but can be located by the data itself. During SyncJob, the slave node will skip the execution of any gtid transaction already executed before. - -Gtid is expressed as a pair of coordinates, `source_ID` identifies the master node, `transaction_ID` indicates the order in which this transaction is executed on the master node (max 263-1). - -``` -GTID = source_id:transaction_id -``` - -For example, the gtid of the 23rd transaction executed on the same master node is: - -``` -3E11FA47-71CA-11E1-9E33-C80AA9429562:23 -``` - -## Configure Canal Server - -Canal is a sub project of Alibaba Otter project. Its main purpose is to provide incremental data subscription and consumption based on MySQL database binlog analysis, which is originally used to solve the scenario of cross machine-room synchronization. - -Canal version 1.1.5 and above is recommended. [download link](https://github.com/alibaba/canal/releases) - -After downloading, please follow the steps below to complete the deployment. - -1. Unzip the canal deployer -2. Create a new directory under the conf folder and rename it as the root directory of instance. The directory name is the destination mentioned later. -3. Modify the instance configuration file (you can copy from `conf/example/instance.properties`) - - ``` - vim conf/{your destination}/instance.properties - ``` - ``` - ## canal instance serverId - canal.instance.mysql.slaveId = 1234 - ## mysql adress - canal.instance.master.address = 127.0.0.1:3306 - ## mysql username/password - canal.instance.dbUsername = canal - canal.instance.dbPassword = canal - ``` -4. start up canal server - - ``` - sh bin/startup.sh - ``` - -5. Validation start up successfully - - ``` - cat logs/{your destination}/{your destination}.log - ``` - ``` - 2013-02-05 22:50:45.636 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [canal.properties] - 2013-02-05 22:50:45.641 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [xxx/instance.properties] - 2013-02-05 22:50:45.803 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start CannalInstance for 1-xxx - 2013-02-05 22:50:45.810 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start successful.... - ``` - -### Principle Description - -By faking its own MySQL dump protocol, canal disguises itself as a slave node, get and parses the binlog of the master node. - -Multiple instances can be started on the canal server. An instance can be regarded as a slave node. Each instance consists of the following parts: - -``` -------------------------------------------------- -| Server | -| -------------------------------------------- | -| | Instance 1 | | -| | ----------- ----------- ----------- | | -| | | Parser | | Sink | | Store | | | -| | ----------- ----------- ----------- | | -| | ----------------------------------- | | -| | | MetaManager | | | -| | ----------------------------------- | | -| -------------------------------------------- | -------------------------------------------------- -``` - -* Parser: Access the data source, simulate the dump protocol, interact with the master, and analyze the protocol -* Sink: Linker between parser and store, for data filtering, processing and distribution -* Store: Data store -* Meta Manager: Metadata management module - -Each instance has its own unique ID in the cluster, that is, server ID. - -In the canal server, the instance is identified by a unique string named destination. The canal client needs destination to connect to the corresponding instance. - -**Note that: canal client and canal instance should correspond to each other one by one** - -Binlog load has forbidded multiple SyncJobs to connect to the same destination. - -The data flow direction in instance is binlog -> Parser -> sink -> store. - -Instance parses binlog logs through the parser module, and the parsed data is cached in the store. When the user submits a SyncJob to Fe, it will start a Canal Client to subscripe and get the data in the store in the corresponding instance. - -The store is actually a ring queue. Users can configure its length and storage space by themselves. - -![store](/images/canal_store.png) - -Store manages the data in the queue through three pointers: - -1. Get pointer: the GET pointer points to the last location get by the Canal Client. -2. Ack pointer: the ACK pointer points to the location of the last successful consumption. -3. Put pointer: the PUT pointer points to the location where the sink module successfully wrote to the store at last. - -``` -canal client asynchronously get data in the store - - get 0 get 1 get 2 put - | | | ...... | - v v v v ---------------------------------------------------------------------- store ring queue - ^ ^ - | | - ack 0 ack 1 -``` - -When the Canal Client calls the Get command, the Canal Server will generate data batches and send them to the Canal Client, and move the Get pointer to the right. The Canal Client can get multiple batches until the Get pointer catches up with the Put pointer. - -When the consumption of data is successful, the Canal Client will return Ack + Batch ID, notify that the consumption has been successful, and move the Ack pointer to the right. The store will delete the data of this batch from the ring queue, make room to get data from the upstream sink module, and then move the Put pointer to the right. - -When the data consumption fails, the client will return a rollback notification of the consumption failure, and the store will reset the Get pointer to the left to the Ack pointer's position, so that the next data get by the Canal Client can start from the Ack pointer again. - -Like the slave node in mysql, Canal Server also needs to save the latest consumption location of the client. All metadata in Canal Server (such as gtid and binlog location) is managed by the metamanager. At present, these metadata is persisted in the meta.dat file in the instance's root directory in JSON format by default. - -## Basic Operation - -### Configure Target Table Properties - -User needs to first create the target table which is corresponding to the MySQL side. - -Binlog Load can only support unique target tables from now, and the batch delete feature of the target table must be activated. - -Example: - -``` --- create target table -CREATE TABLE `test1` ( - `a` int(11) NOT NULL COMMENT "", - `b` int(11) NOT NULL COMMENT "" -) ENGINE=OLAP -UNIQUE KEY(`a`) -COMMENT "OLAP" -DISTRIBUTED BY HASH(`a`) BUCKETS 8; - --- enable batch delete -ALTER TABLE canal_test.test1 ENABLE FEATURE "BATCH_DELETE"; -``` - -### Create SyncJob - -The detailed syntax of creating a SyncJob can be viewd in `help create sync job` command. Here we mainly introduce the precautions when creating a SyncJob. - -* job_name - - `job_Name` is the unique identifier of the SyncJob in the current database. With a specified job name, only one SyncJob can be running at the same time. - -* channel_desc - - `column_Mapping` mainly refers to the mapping relationship between the columns of the MySQL source table and the Doris target table. - - If it is not specified, the columns of the source table and the target table will consider correspond one by one in order. - - However, we still recommend explicitly specifying the mapping relationship of columns, so that when the schema-change of the target table (such as adding a nullable column), data synchronization can still be carried out. - - Otherwise, when the schema-change occur, because the column mapping relationship is no longer one-to-one, the SyncJob will report an error. - -* binlog_desc - - `binlog_desc` defines some necessary information for docking the remote binlog address. - - At present, the only supported docking type is the canal type. In canal type, all configuration items need to be prefixed with the canal prefix. - - 1. canal.server.ip: the address of the canal server - 2. canal.server.port: the port of canal server - 3. canal.destination: the identifier of the instance - 4. canal.batchSize: the maximum batch size get from the canal server for each batch. Default 8192 - 5. canal.username: the username of instance - 6. canal.password: the password of instance - 7. canal.debug: when set to true, the details message of each batch and each row will be printed, which may affect the performance. - -### Show Job Status - -Specific commands and examples for showing job status can be found in `help show sync job;` command. - -The parameters in the result set have the following meanings: - -* State - - The current stage of the job. The transition between job states is shown in the following figure: - - ``` - +-------------+ - create job | PENDING | resume job - +-----------+ <-------------+ - | +-------------+ | - +----v-------+ +-------+----+ - | RUNNING | pause job | PAUSED | - | +-----------------------> | - +----+-------+ run error +-------+----+ - | +-------------+ | - | | CANCELLED | | - +-----------> <-------------+ - stop job +-------------+ stop job - system error - ``` - - After the SyncJob is submitted, the status is pending. - - After the Fe scheduler starts the canal client, the status becomes running. - - User can control the status of the job by three commands: `stop/pause/resume`. After the operation, the job status is `cancelled/paused/running` respectively. - - There is only one final stage of the job, Cancelled. When the job status changes to Canceled, it cannot be resumed again. - - When an error occurs during SyncJob is running, if the error is unrecoverable, the status will change to cancelled, otherwise it will change to paused. - -* Channel - - The mapping relationship between all source tables and target tables of the job. - -* Status - - The latest consumption location of the current binlog (if the gtid mode is on, the gtid will be displayed), and the delay time of the Doris side compared with the MySQL side. - -* JobConfig - - The remote server information of the docking, such as the address of the Canal Server and the destination of the connected instance. - -### Control Operation - -Users can control the status of jobs through `stop/pause/resume` commands. - -You can use `HELP STOP SYNC JOB;`, `HELP PAUSE SYNC JOB`; And `HELP RESUME SYNC JOB;` commands to view help and examples. - -## Related Parameters - -### Canal configuration - -* `canal.ip` - - canal server's ip address - -* `canal.port` - - canal server's port - -* `canal.instance.memory.buffer.size` - - The queue length of the store ring queue, must be set to the power of 2, the default length is 16384. This value is equal to the maximum number of events that can be cached on the canal side and directly determines the maximum number of events that can be accommodated in a transaction on the Doris side. It is recommended to make it large enough to prevent the upper limit of the amount of data that can be accommodated in a transaction on the Doris side from being too small, resulting in too frequent transaction submission and data version accumulation. - -* `canal.instance.memory.buffer.memunit` - - The default space occupied by an event at the canal end, default value is 1024 bytes. This value multiplied by `canal.instance.memory.buffer.size` is equal to the maximum space of the store. For example, if the queue length of the store is 16384, the space of the store is 16MB. However, the actual size of an event is not actually equal to this value, but is determined by the number of rows of data in the event and the length of each row of data. For example, the insert event of a table with only two columns is only 30 bytes, but the delete event may reach thousands of bytes. This is because the number of rows of delete event is usually more than that of insert event. - - -### Fe configuration - -The following configuration belongs to the system level configuration of SyncJob. The configuration value can be modified in configuration file fe.conf. - -* `enable_create_sync_job` - - Turn on the Binlog Load feature. The default value is false. This feature is turned off. - -* `sync_commit_interval_second` - - Maximum interval time between commit transactions. If there is still data in the channel that has not been committed after this time, the consumer will notify the channel to commit the transaction. - -* `min_sync_commit_size` - - The minimum number of events required to commit a transaction. If the number of events received by Fe is less than it, Fe will continue to wait for the next batch of data until the time exceeds `sync_commit_interval_second`. The default value is 10000 events. If you want to modify this configuration, please ensure that this value is less than the `canal.instance.memory.buffer.size` configuration on the canal side (16384 by default). Otherwise, Fe will try to get more events than the length of the store queue without ack, causing the store queue to block until timeout. - -* `min_bytes_sync_commit` - - The minimum data size required to commit a transaction. If the data size received by Fe is smaller than it, it will continue to wait for the next batch of data until the time exceeds `sync_commit_interval_second`. The default value is 15MB. If you want to modify this configuration, please ensure that this value is less than the product `canal.instance.memory.buffer.size` and `canal.instance.memory.buffer.memunit` on the canal side (16MB by default). Otherwise, Fe will try to get data from canal larger than the store space without ack, causing the store queue to block until timeout. - -* `max_bytes_sync_commit` - - The maximum size of the data when the transaction is committed. If the data size received by Fe is larger than it, it will immediately commit the transaction and send the accumulated data. The default value is 64MB. If you want to modify this configuration, please ensure that this value is greater than the product of `canal.instance.memory.buffer.size` and `canal.instance.memory.buffer.mmemunit` on the canal side (16MB by default) and `min_bytes_sync_commit`. - -* `max_sync_task_threads_num` - - The maximum number of threads in the SyncJobs' thread pool. There is only one thread pool in the whole Fe for synchronization, which is used to process the tasks created by all SyncJobs in the Fe. - -## FAQ - -1. Will modifying the table structure affect data synchronization? - - Yes. The SyncJob cannot prohibit `alter table` operation. -When the table's schema changes, if the column mapping cannot match, the job may be suspended incorrectly. It is recommended to reduce such problems by explicitly specifying the column mapping relationship in the data synchronization job, or by adding nullable columns or columns with default values. - -2. Will the SyncJob continue to run after the database is deleted? - - No. In this case, the SyncJob will be checked by the Fe's scheduler thread and be stopped. - -3. Can multiple SyncJobs be configured with the same `IP:Port + destination`? - - No. When creating a SyncJob, FE will check whether the `IP:Port + destination` is duplicate with the existing job to prevent multiple jobs from connecting to the same instance. - -4. Why is the precision of floating-point type different between MySQL and Doris during data synchronization? - - The precision of Doris floating-point type is different from that of MySQL. You can choose to use decimal type instead. \ No newline at end of file diff --git a/docs/en/administrator-guide/load-data/broker-load-manual.md b/docs/en/administrator-guide/load-data/broker-load-manual.md deleted file mode 100644 index 72a1976f2f..0000000000 --- a/docs/en/administrator-guide/load-data/broker-load-manual.md +++ /dev/null @@ -1,536 +0,0 @@ ---- -{ - "title": "Broker Load", - "language": "en" -} ---- - - - -# Broker Load - -Broker load is an asynchronous import method, and the data source supported depends on the data source supported by the Broker process. - -Users need to create Broker load imports through MySQL protocol and check the import results by viewing the import commands. - -## Applicable scenarios - -* Source data in Broker accessible storage systems, such as HDFS. -* Data volumes range from tens to hundreds of GB. - -## Noun Interpretation - -1. Frontend (FE): Metadata and scheduling nodes of Doris system. In the import process, it is mainly responsible for the generation of import plan and the scheduling of import tasks. -2. Backend (BE): The computing and storage nodes of Doris system. In the import process, it is mainly responsible for ETL and storage of data. -3. Broker: Broker is an independent stateless process. It encapsulates the file system interface and provides Doris with the ability to read files in the remote storage system. -4. Plan: Import the execution plan, and BE executes the import execution plan to import data into Doris system. - -## Basic Principles - -After the user submits the import task, FE generates the corresponding plan and distributes the plan to several BEs according to the number of BEs and the size of the file. Each BE performs part of the import data. - -BE pulls data from Broker and imports it into the system after transforming the data. All BEs complete the import, and the FE decides whether the import is successful or not. - -``` - + - | 1. user create broker load - v - +----+----+ - | | - | FE | - | | - +----+----+ - | - | 2. BE etl and load the data - +--------------------------+ - | | | -+---v---+ +--v----+ +---v---+ -| | | | | | -| BE | | BE | | BE | -| | | | | | -+---+-^-+ +---+-^-+ +--+-^--+ - | | | | | | - | | | | | | 3. pull data from broker -+---v-+-+ +---v-+-+ +--v-+--+ -| | | | | | -|Broker | |Broker | |Broker | -| | | | | | -+---+-^-+ +---+-^-+ +---+-^-+ - | | | | | | -+---v-+-----------v-+----------v-+-+ -| HDFS/BOS/AFS cluster | -| | -+----------------------------------+ - -``` - -## Basic operations - -### Create a load - -Broker load create a data load job - -Grammar: - -``` -LOAD LABEL db_name.label_name -(data_desc, ...) -WITH BROKER broker_name broker_properties -[PROPERTIES (key1=value1, ... )] - -* data_desc: - - DATA INFILE ('file_path', ...) - [NEGATIVE] - INTO TABLE tbl_name - [PARTITION (p1, p2)] - [COLUMNS TERMINATED BY separator ] - [(col1, ...)] - [PRECEDING FILTER predicate] - [SET (k1=f1(xx), k2=f2(xx))] - [WHERE predicate] - -* broker_properties: - - (key1=value1, ...) -``` -Examples: - -``` -LOAD LABEL db1.label1 -( - DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1") - INTO TABLE tbl1 - COLUMNS TERMINATED BY "," - (tmp_c1,tmp_c2) - SET - ( - id=tmp_c2, - name=tmp_c1) - ), - DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2") - INTO TABLE tbl2 - COLUMNS TERMINATED BY "," - (col1, col2) - where col1 > 1 -) -WITH BROKER 'broker' -( - "username"="user", - "password"="pass" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -Create the imported detailed grammar execution ``HELP BROKER LOAD `` View grammar help. This paper mainly introduces the parametric meaning and points for attention in Broker load's creation import grammar. - -#### Label - -Identity of import task. Each import task has a unique Label within a single database. Label is a user-defined name in the import command. With this Label, users can view the execution of the corresponding import task. - -Another function of Label is to prevent users from repeatedly importing the same data. **It is strongly recommended that users use the same label for the same batch of data. Thus, repeated requests for the same batch of data can only be accepted once, guaranteeing at-Most-One semantics** - -When the corresponding import job status of Label is CANCELLED, it can be used again to submit the import job. - -#### Data Description Class Parameters - -Data description class parameters mainly refer to the parameters belonging to ``data_desc`` in Broker load creating import statements. Each group of ```data_desc``` mainly describes the data source address, ETL function, target table and partition information involved in this import. - -The following is a detailed explanation of some parameters of the data description class: - -+ Multi-table import - - Broker load supports a single import task involving multiple tables, and each Broker load import task can implement multiple tables import by declaring multiple tables in multiple ``data_desc``. Each individual ```data_desc``` can also specify the data source address belonging to the table. Broker load guarantees atomic success or failure between multiple tables imported at a single time. - -+ negative - - ```data_desc``` can also set up data fetching and anti-importing. This function is mainly used when aggregated columns in data tables are of SUM type. If you want to revoke a batch of imported data. The `negative` parameter can be used as a batch of data. Doris automatically retrieves this batch of data on aggregated columns to eliminate the same batch of data. - -+ partition - - In `data_desc`, you can specify the partition information of the table to be imported, but it will not be imported if the data to be imported does not belong to the specified partition. At the same time, data that does not specify a Partition is considered error data. - -+ preceding filter predicate - - Used to filter original data. The original data is the data without column mapping and transformation. The user can filter the data before conversion, select the desired data, and then perform the conversion. - -+ where predicate - - The where statement in ```data_desc``` is responsible for filtering the data that has been transformed. The unselected rows which is filtered by where predicate will not be calculated in ```max_filter_ratio``` . If there are more than one where predicate of the same table , the multi where predicate will be merged from different ```data_desc``` and the policy is AND. - -+ merge\_type - The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics - - -#### Import job parameters - -Import job parameters mainly refer to the parameters in Broker load creating import statement that belong to ``opt_properties``. Import operation parameters act on the whole import operation. - -The following is a detailed explanation of some parameters of the import operation parameters: - -+ time out - - The time-out of the import job (in seconds) allows the user to set the time-out of each import by himself in ``opt_properties``. If the import task is not completed within the set timeout time, it will be cancelled by the system and become CANCELLED. The default import timeout for Broker load is 4 hours. - - Usually, the user does not need to manually set the timeout of the import task. When the import cannot be completed within the default timeout time, the task timeout can be set manually. - - > Recommended timeout - > - > Total File Size (MB) / Slowest Import Speed (MB/s) > timeout >((MB) * Number of tables to be imported and related Roll up tables) / (10 * Number of concurrent imports) - - > The concurrency of imports can be seen in the final configuration of the import system in the document. The current import speed limit is 10MB/s in 10 of the formulas. - - > For example, a 1G data to be imported contains three Rollup tables, and the current concurrency of imports is 3. The minimum value of timeout is ```(1 * 1024 * 3) / (10 * 3) = 102 seconds.``` - - Because the machine environment of each Doris cluster is different and the concurrent query tasks of the cluster are different, the slowest import speed of the user Doris cluster requires the user to guess the import task speed according to the history. - -+ max\_filter\_ratio - - The maximum tolerance rate of the import task is 0 by default, and the range of values is 0-1. When the import error rate exceeds this value, the import fails. - - If the user wishes to ignore the wrong row, the import can be successful by setting this parameter greater than 0. - - The calculation formula is as follows: - - ``` (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) > max_filter_ratio ``` - - ``` dpp.abnorm.ALL``` denotes the number of rows whose data quality is not up to standard. Such as type mismatch, column mismatch, length mismatch and so on. - - ``` dpp.norm.ALL ``` refers to the number of correct data in the import process. The correct amount of data for the import task can be queried by the ``SHOW LOAD`` command. - - The number of rows in the original file = `dpp.abnorm.ALL + dpp.norm.ALL` - -* exec\_mem\_limit - - Memory limit. Default is 2GB. Unit is Bytes. - -+ strict\_mode - - Broker load can use `strict mode`. Use ```properties ("strict_mode" = "true")``` to enable `strict mode`, default is false - - The strict mode means that the column type conversion in the import process is strictly filtered. The strategy of strict filtering is as follows: - - 1. For column type conversion, if strict mode is true, the wrong data will be filtered. Error data here refers to the kind of data that the original data is not null and the result is null after participating in column type conversion. - - 2. Strict mode does not affect the imported column when it is generated by a function transformation. - - 3. For a column type imported that contains scope restrictions, strict mode does not affect it if the original data can normally pass type conversion, but cannot pass scope restrictions. For example, if the type is decimal (1,0) and the original data is 10, it falls within the scope of type conversion but not column declaration. This data strict has no effect on it. - -#### Import Relation between strict mode source data - -Here's an example of a column type TinyInt - -> Note: When columns in a table allow null values to be imported - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|---------| -|null | \N | N/A | true or false | NULL| -|not null | aaa or 2000 | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 | 1 | true or false | correct data| - -Here's an example of column type Decimal (1,0) - -> Note: When columns in a table allow null values to be imported - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|--------| -|null | \N | N/A | true or false | NULL| -|not null | aaa | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 or 10 | 1 | true or false | correct data| - -> Note: Although 10 is a value beyond the range, strict mode does not affect it because its type meets the requirements of decimal. 10 will eventually be filtered in other ETL processes. But it will not be filtered by strict mode. - -### View load - -Broker load import mode is asynchronous, so the user must create the imported Label record and use Label in the **view Import command to view the import result**. View import commands are common in all import modes. The specific syntax can be `HELP SHOW LOAD`. - -Examples: - -``` -mysql> show load order by createtime desc limit 1\G -*************************** 1. row *************************** - JobId: 76391 - Label: label1 - State: FINISHED - Progress: ETL:100%; LOAD:100% - Type: BROKER - EtlInfo: dpp.abnorm.ALL=15; dpp.norm.ALL=28133376 - TaskInfo: cluster:N/A; timeout(s):10800; max_filter_ratio:5.0E-5 - ErrorMsg: N/A - CreateTime: 2019-07-27 11:46:42 - EtlStartTime: 2019-07-27 11:46:44 - EtlFinishTime: 2019-07-27 11:46:44 - LoadStartTime: 2019-07-27 11:46:44 -LoadFinishTime: 2019-07-27 11:50:16 - URL: http://192.168.1.1:8040/api/_load_error_log?file=__shard_4/error_log_insert_stmt_4bb00753932c491a-a6da6e2725415317_4bb00753932c491a_a6da6e2725415317 - JobDetails: {"Unfinished backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"ScannedRows":2390016,"TaskNumber":1,"All backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"FileNumber":1,"FileSize":1073741824} -``` - -The following is mainly about the significance of viewing the parameters in the return result set of the import command: - -+ JobId - - The unique ID of the import task is different for each import task, which is automatically generated by the system. Unlike Label, JobId will never be the same, while Label can be reused after the import task fails. - -+ Label - - Identity of import task. - -+ State - - Import the current phase of the task. In the Broker load import process, PENDING and LOADING are the two main import states. If the Broker load is in the PENDING state, it indicates that the current import task is waiting to be executed; the LOADING state indicates that it is executing. - - There are two final stages of the import task: CANCELLED and FINISHED. When Load job is in these two stages, the import is completed. CANCELLED is the import failure, FINISHED is the import success. - -+ Progress - - Import the progress description of the task. There are two kinds of progress: ETL and LOAD, which correspond to the two stages of the import process, ETL and LOADING. At present, Broker load only has the LOADING stage, so ETL will always be displayed as `100%`. - - The progress range of LOAD is 0-100%. - - ``` LOAD Progress = Number of tables currently completed / Number of tables designed for this import task * 100%``` - - **If all import tables complete the import, then the progress of LOAD is 99%** import enters the final effective stage, and the progress of LOAD will only be changed to 100% after the entire import is completed. - - Import progress is not linear. So if there is no change in progress over a period of time, it does not mean that the import is not being implemented. - -+ Type - - Types of import tasks. The type value of Broker load is only BROKER. -+ EtlInfo - - It mainly shows the imported data quantity indicators `unselected.rows`, `dpp.norm.ALL` and `dpp.abnorm.ALL`. The first value shows the rows which has been filtered by where predicate. Users can verify that the error rate of the current import task exceeds max\_filter\_ratio based on these two indicators. - -+ TaskInfo - - It mainly shows the current import task parameters, that is, the user-specified import task parameters when creating the Broker load import task, including `cluster`, `timeout`, and `max_filter_ratio`. - -+ ErrorMsg - - When the import task status is CANCELLED, the reason for the failure is displayed in two parts: type and msg. If the import task succeeds, the `N/A` is displayed. - - The value meaning of type: - - ``` - USER_CANCEL: User Canceled Tasks - ETL_RUN_FAIL: Import tasks that failed in the ETL phase - ETL_QUALITY_UNSATISFIED: Data quality is not up to standard, that is, the error rate exceedsmax_filter_ratio - LOAD_RUN_FAIL: Import tasks that failed in the LOADING phase - TIMEOUT: Import task not completed in overtime - UNKNOWN: Unknown import error - ``` - -+ CreateTime /EtlStartTime /EtlFinishTime /LoadStartTime /LoadFinishTime - - These values represent the creation time of the import, the beginning time of the ETL phase, the completion time of the ETL phase, the beginning time of the Loading phase and the completion time of the entire import task, respectively. - - Broker load import has no ETL stage, so its EtlStartTime, EtlFinishTime, LoadStartTime are set to the same value. - - Import tasks stay in CreateTime for a long time, while LoadStartTime is N/A, which indicates that import tasks are heavily stacked at present. Users can reduce the frequency of import submissions. - - ``` - LoadFinishTime - CreateTime = Time consumed by the entire import task - LoadFinishTime - LoadStartTime = The entire Broker load import task execution time = the time consumed by the entire import task - the time the import task waits - ``` - -+ URL - - The error data sample of the import task can be obtained by accessing the URL address. When there is no error data in this import, the URL field is N/A. - -+ JobDetails - - Display some details of the running status of the job. Including file number, total file size(Bytes), num of sub tasks, scanned rows, related backend ids and unfinished backend ids. - - ``` - {"Unfinished backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"ScannedRows":2390016,"TaskNumber":1,"All backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"FileNumber":1,"FileSize":1073741824} - ``` - - This info will be updated every 5 seconds. the ScannedRows only for displaying the job progress, not indicate the real numbers. - -### Cancel load - -When the Broker load job status is not CANCELLED or FINISHED, it can be manually cancelled by the user. When canceling, you need to specify a Label for the import task to be cancelled. Canceling Import command syntax can perform `HELP CANCEL LOAD` view. - -## Relevant System Configuration - -### FE configuration - -The following configurations belong to the Broker load system-level configuration, which acts on all Broker load import tasks. Configuration values are adjusted mainly by modifying `fe.conf`. - -+ min\_bytes\_per\_broker\_scanner/max\_bytes\_per\_broker\_scanner/max\_broker\_concurrency - - The first two configurations limit the minimum and maximum amount of data processed by a single BE. The third configuration limits the maximum number of concurrent imports for a job. The minimum amount of data processed, the maximum number of concurrency, the size of source files and the number of BEs in the current cluster **together determine the concurrency of this import**. - - ``` - The number of concurrent imports = Math. min (source file size / minimum throughput, maximum concurrency, current number of BE nodes) - Processing capacity of this import of a single BE = source file size / concurrency of this import - ``` - - Usually the maximum amount of data supported by an import job is `max_bytes_per_broker_scanner * number of BE nodes`. If you need to import a larger amount of data, you need to adjust the size of the `max_bytes_per_broker_scanner` parameter appropriately. - -Default configuration: - -``` -Parameter name: min_bytes_per_broker_scanner, default 64MB, unit bytes. -Parameter name: max_broker_concurrency, default 10. -Parameter name: max_bytes_per_broker_scanner, default 3GB, unit bytes. -``` - -## Best Practices - -### Application scenarios - -The most appropriate scenario to use Broker load is the scenario of raw data in a file system (HDFS, BOS, AFS). Secondly, since Broker load is the only way of asynchronous import in a single import, users can also consider using Broker load if they need to use asynchronous access in importing large files. - -### Data volume - -We will only discuss the case of a single BE. If the user cluster has more than one BE, the amount of data in the heading below should be multiplied by the number of BEs. For example, if the user has three BEs, then the number below 3G (including) should be multiplied by 3, that is, under 9G (including). - -+ Below 3G (including) - - Users can submit Broker load to create import requests directly. - -+ Over 3G - - Since the maximum processing capacity of a single imported BE is 3G, the imported files over 3G need to be imported by adjusting the import parameters of Broker load to achieve the import of large files. - - 1. Modify the maximum number of scans and concurrency of a single BE according to the current number of BEs and the size of the original file. - - ``` - Modify the configuration in fe.conf - - max_broker_concurrency = BE number - The amount of data processed by a single BE for the current import task = the original file size / max_broker_concurrency - Max_bytes_per_broker_scanner >= the amount of data processed by a single BE of the current import task - - For example, a 100G file with 10 BEs in the cluster - max_broker_concurrency = 10 - Max================ - - ``` - - After modification, all BEs process import tasks concurrently, and each BE processes part of the original file. - - *Note: The configurations in both FEs are system configurations, that is to say, their modifications work on all Broker load tasks.* - - 2. Customize the timeout time of the current import task when creating the import - - ``` - Current import task single BE processing data volume / user Doris cluster slowest import speed (MB/s) >= current import task timeout time >= current import task single BE processing data volume / 10M/s - - For example, a 100G file with 10 BEs in the cluster - Timeout > 1000s = 10G / 10M /s - - ``` - - 3. When the user finds that the timeout time calculated in the second step exceeds the default maximum time-out time for importing the system by 4 hours. - - At this time, it is not recommended that users directly increase the maximum time-out to solve the problem. If the single import time exceeds the default maximum import timeout of 4 hours, it is better to solve the problem by splitting the file to be imported and importing it several times. The main reason is that if a single import exceeds 4 hours, the time cost of retry after import failure is very high. - - The maximum amount of imported file data expected by the Doris cluster can be calculated by the following formula: - - ``` - Expected maximum imported file data = 14400s * 10M / s * BE number - For example, the BE number of clusters is 10. - Expected maximum imported file data volume = 14400 * 10M / s * 10 = 1440000M ≈ 1440G - - Note: The average user's environment may not reach the speed of 10M/s, so it is recommended that more than 500G files be split and imported. - - ``` - -### Job Scheduling - -The system limits the number of Broker Load jobs running in a cluster to prevent too many Load jobs from running at the same time. - -First, the configuration parameter of FE: `desired_max_waiting_jobs` will limit the number of Broker Load jobs that are pending or running (the job status is PENDING or LOADING) in a cluster. The default is 100. If this threshold is exceeded, the newly submitted job will be rejected directly. - -A Broker Load job will be divided into pending task and loading task phases. Among them, the pending task is responsible for obtaining the information of the imported file, and the loading task will be sent to BE to perform specific import tasks. - -The configuration parameter `async_pending_load_task_pool_size` of FE is used to limit the number of pending tasks running at the same time. It is also equivalent to controlling the number of import tasks that are actually running. This parameter defaults to 10. In other words, assuming that the user submits 100 Load jobs, only 10 jobs will enter the LOADING state and start execution, while other jobs are in the PENDING waiting state. - -The FE configuration parameter `async_loading_load_task_pool_size` is used to limit the number of loading tasks that run at the same time. A Broker Load job will have 1 pending task and multiple loading tasks (equal to the number of DATA INFILE clauses in the LOAD statement). So `async_loading_load_task_pool_size` should be greater than or equal to `async_pending_load_task_pool_size`. - -Because the work of pending tasks is relatively lightweight (for example, just accessing hdfs to obtain file information), `async_pending_load_task_pool_size` does not need to be large, and the default 10 is usually sufficient. And `async_loading_load_task_pool_size` is really used to limit the import tasks that can be run at the same time. It can be adjusted appropriately according to the cluster size. - -### Performance analysis - -You can execute `set enable_profile=true` to open the load job profile before submitting the import job. After the import job is completed, you can view the profile of the import job in the `Queris` tab of the FE web page. - -This profile can help analyze the running status of the import job. - -Currently, the profile can only be viewed after the job is successfully executed. - -### Complete examples - -Data situation: User data in HDFS, file address is hdfs://abc.com:8888/store_sales, HDFS authentication user name is root, password is password, data size is about 30G, hope to import into database bj_sales table store_sales. - -Cluster situation: The number of BEs in the cluster is about 3, and the Broker name is broker. - -+ Step 1: After the calculation of the above method, the single BE import quantity is 10G, then the configuration of FE needs to be modified first, and the maximum amount of single BE import is changed to: - - ``` - max_bytes_per_broker_scanner = 10737418240 - - ``` - -+ Step 2: Calculated, the import time is about 1000s, which does not exceed the default timeout time. No custom timeout time for import can be configured. - -+ Step 3: Create import statements - - ``` - LOAD LABEL bj_sales.store_sales_broker_load_01 - ( - DATA INFILE("hdfs://abc.com:8888/store_sales") - INTO TABLE store_sales - ) - WITH BROKER 'broker' - ("username"="root", "password"="password"); - ``` - -## Common Questions - -* failed with: `Scan bytes per broker scanner exceed limit:xxx` - - Refer to the Best Practices section of the document to modify the FE configuration items `max_bytes_per_broker_scanner` and `max_broker_concurrency'.` - -* failed with: `failed to send batch` or `TabletWriter add batch with unknown id` - - Refer to **General System Configuration** in **BE Configuration** in the Import Manual (./load-manual.md), and modify `query_timeout` and `streaming_load_rpc_max_alive_time_sec` appropriately. - -* failed with: `LOAD_RUN_FAIL; msg: Invalid Column Name: xxx` - - If it is PARQUET or ORC format data, you need to keep the column names in the file header consistent with the column names in the doris table, such as: - ``` - (tmp_c1, tmp_c2) - SET - ( - id = tmp_c2, - name = tmp_c1 - ) - ``` - Represents getting the column with (tmp_c1, tmp_c2) as the column name in parquet or orc, which is mapped to the (id, name) column in the doris table. If set is not set, the column names in the column are used as the mapping relationship. - - Note: If the orc file directly generated by some hive versions is used, the table header in the orc file is not the column name in the hive meta, but (_col0, _col1, _col2, ...), which may cause the Invalid Column Name error, then You need to use set for mapping. diff --git a/docs/en/administrator-guide/load-data/delete-manual.md b/docs/en/administrator-guide/load-data/delete-manual.md deleted file mode 100644 index fc0302bb7b..0000000000 --- a/docs/en/administrator-guide/load-data/delete-manual.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -{ - "title": "Delete", - "language": "en" -} ---- - - - -# Delete - -Unlike other import methods, delete is a synchronization process. Similar to insert into, all delete operations are an independent import job in Doris. Generally, delete statements need to specify tables, partitions and delete conditions to tell which data to be deleted, and the data on base index and rollup index will be deleted at the same time. - - -## Syntax - -The delete statement's syntax is as follows: - -``` -DELETE FROM table_name [PARTITION partition_name] -WHERE -column_name1 op value[ AND column_name2 op value ...]; -``` - -example 1: - -``` -DELETE FROM my_table PARTITION p1 WHERE k1 = 3; -``` - -example 2: - -``` -DELETE FROM my_table PARTITION p1 WHERE k1 < 3 AND k2 = "abc"; -``` - -The following describes the parameters used in the delete statement: - -* PARTITION - - The target partition of the delete statement. If not specified, the table must be a single partition table, otherwise it cannot be deleted - -* WHERE - - The condition of the delete statement. All delete statements must specify a where condition. - -Explanation: - -1. The type of `OP` in the WHERE condition can only include `=, >, <, >=, <=, !=, in, not in`. -2. The column in the WHERE condition can only be the `key` column. -3. Cannot delete when the `key` column does not exist in any rollup table. -4. Each condition in WHERE condition can only be connected by `and`. If you want `or`, you are suggested to write these conditions into two delete statements. -5. If the specified table is a range or list partitioned table, `PARTITION` must be specified unless the table is a single partition table,. -6. Unlike the insert into command, delete statement cannot specify `label` manually. You can view the concept of `label` in [Insert Into](./insert-into-manual.md) - -## Delete Result - -The delete command is an SQL command, and the returned results are synchronous. It can be divided into the following types: - -1. Successful visible - - If delete completes successfully and is visible, the following results will be returned, `query OK` indicates success. - - ``` - mysql> delete from test_tbl PARTITION p1 where k1 = 1; - Query OK, 0 rows affected (0.04 sec) - {'label':'delete_e7830c72-eb14-4cb9-bbb6-eebd4511d251', 'status':'VISIBLE', 'txnId':'4005'} - ``` - -2. Submitted successfully, but not visible - - - The transaction submission of Doris is divided into two steps: submission and publish version. Only after the publish version step is completed, the result will be visible to the user. If it has been submitted successfully, then it can be considered that the publish version step will eventually success. Doris will try to wait for publishing for a period of time after submitting. If it has timed out, even if the publishing version has not been completed, it will return to the user in priority and prompt the user that the submission has been completed but not visible. If delete has been committed and executed, but has not been published and visible, the following results will be returned. - - ``` - mysql> delete from test_tbl PARTITION p1 where k1 = 1; - Query OK, 0 rows affected (0.04 sec) - {'label':'delete_e7830c72-eb14-4cb9-bbb6-eebd4511d251', 'status':'COMMITTED', 'txnId':'4005', 'err':'delete job is committed but may be taking effect later' } - ``` - - The result will return a JSON string at the same time: - - `affected rows`: Indicates the row affected by this deletion. Since the deletion of Doris is currently a logical deletion, the value is always 0. - - `label`: The label generated automatically to be the signature of the delete jobs. Each job has a unique label within a single database. - - `status`: Indicates whether the data deletion is visible. If it is visible, `visible` will be displayed. If it is not visible, `committed` will be displayed. - - - `txnId`: The transaction ID corresponding to the delete job - - `err`: Field will display some details of this deletion - -3. Commit failed, transaction cancelled - - If the delete statement is not submitted successfully, it will be automatically aborted by Doris and the following results will be returned - - - ``` - mysql> delete from test_tbl partition p1 where k1 > 80; - ERROR 1064 (HY000): errCode = 2, detailMessage = {错误原因} - ``` - - example: - - A timeout deletion will return the timeout and unfinished replicas displayed as ` (tablet = replica)` - - - ``` - mysql> delete from test_tbl partition p1 where k1 > 80; - ERROR 1064 (HY000): errCode = 2, detailMessage = failed to delete replicas from job: 4005, Unfinished replicas:10000=60000, 10001=60000, 10002=60000 - ``` - - **The correct processing logic for the returned results of the delete operation is as follows:** - - 1. If `Error 1064 (HY000)` is returned, deletion fails - - 2. If the returned result is `Query OK`, the deletion is successful - - 1. If `status` is `committed`, the data deletion is committed and will be eventually invisible. Users can wait for a while and then use the `show delete` command to view the results. - 2. If `status` is `visible`, the data have been deleted successfully. - -## Relevant Configuration - -### FE configuration - -**TIMEOUT configuration** - -In general, Doris's deletion timeout is limited from 30 seconds to 5 minutes. The specific time can be adjusted through the following configuration items - -* `tablet_delete_timeout_second` - - The timeout of delete itself can be elastically changed by the number of tablets in the specified partition. This configuration represents the average timeout contributed by a tablet. The default value is 2. - - Assuming that there are 5 tablets under the specified partition for this deletion, the timeout time available for the deletion is 10 seconds. Because the minimum timeout is 30 seconds which is higher than former timeout time, the final timeout is 30 seconds. - -* `load_straggler_wait_second` - - If the user estimates a large amount of data, so that the upper limit of 5 minutes is insufficient, the user can adjust the upper limit of timeout through this item, and the default value is 300. - - **The specific calculation rule of timeout(seconds)** - - `TIMEOUT = MIN(load_straggler_wait_second, MAX(30, tablet_delete_timeout_second * tablet_num))` - -* `query_timeout` - - Because delete itself is an SQL command, the deletion statement is also limited by the session variables, and the timeout is also affected by the session value `query_timeout`. You can increase the value by `set query_timeout = xxx`. - -**InPredicate configuration** - -* `max_allowed_in_element_num_of_delete` - - If the user needs to take a lot of elements when using the in predicate, the user can adjust the upper limit of the allowed in elements number, and the default value is 1024. - -## Show delete history - -1. The user can view the deletion completed in history through the show delete statement. - - Syntax - - ``` - SHOW DELETE [FROM db_name] - ``` - - example - - ``` - mysql> show delete from test_db; - +-----------+---------------+---------------------+-----------------+----------+ - | TableName | PartitionName | CreateTime | DeleteCondition | State | - +-----------+---------------+---------------------+-----------------+----------+ - | empty_tbl | p3 | 2020-04-15 23:09:35 | k1 EQ "1" | FINISHED | - | test_tbl | p4 | 2020-04-15 23:09:53 | k1 GT "80" | FINISHED | - +-----------+---------------+---------------------+-----------------+----------+ - 2 rows in set (0.00 sec) - ``` - diff --git a/docs/en/administrator-guide/load-data/insert-into-manual.md b/docs/en/administrator-guide/load-data/insert-into-manual.md deleted file mode 100644 index bdc85f439a..0000000000 --- a/docs/en/administrator-guide/load-data/insert-into-manual.md +++ /dev/null @@ -1,297 +0,0 @@ ---- -{ - "title": "Insert Into", - "language": "en" -} ---- - - - -# Insert Into - -The use of Insert Into statements is similar to that of Insert Into statements in databases such as MySQL. But in Doris, all data writing is a separate import job. So Insert Into is also introduced here as an import method. - -The main Insert Into command contains the following two kinds; - -* INSERT INTO tbl SELECT ... -* INSERT INTO tbl (col1, col2, ...) VALUES (1, 2, ...), (1,3, ...); - -The second command is for Demo only, not in a test or production environment. - -## Basic operations - -### Create a Load - -The Insert Into command needs to be submitted through MySQL protocol. Creating an import request returns the import result synchronously. - -Grammar: - -``` -INSERT INTO table_name [partition_info] [WITH LABEL label] [col_list] [query_stmt] [VALUES]; -``` - -Examples: - -``` -INSERT INTO tbl2 WITH LABEL label1 SELECT * FROM tbl3; -INSERT INTO tbl1 VALUES ("qweasdzxcqweasdzxc"), ("a"); -``` - -**Notice** - -When using `CTE(Common Table Expressions)` as the query part of insert operation, the `WITH LABEL` or column list part must be specified. -For example: - -``` -INSERT INTO tbl1 WITH LABEL label1 -WITH cte1 AS (SELECT * FROM tbl1), cte2 AS (SELECT * FROM tbl2) -SELECT k1 FROM cte1 JOIN cte2 WHERE cte1.k1 = 1; - -INSERT INTO tbl1 (k1) -WITH cte1 AS (SELECT * FROM tbl1), cte2 AS (SELECT * FROM tbl2) -SELECT k1 FROM cte1 JOIN cte2 WHERE cte1.k1 = 1; -``` - -The following is a brief introduction to the parameters used in creating import statements: - -+ partition\_info - - Import the target partition of the table. If the target partition is specified, only the data that matches the target partition will be imported. If not specified, the default value is all partitions of the table. - -+ col\_list - - The target column of the import table can exist in any order. If no target column is specified, the default value is all columns in this table. If a column in the table does not exist in the target column, the column needs a default value, otherwise Insert Into will fail. - - If the result column type of the query statement is inconsistent with the type of the target column, an implicit type conversion is invoked. If the conversion is not possible, the Insert Into statement will report a parsing error. - -+ query\_stmt - - Through a query statement, the results of the query statement are imported into other tables in Doris system. Query statements support any SQL query syntax supported by Doris. - -+ VALUES - - Users can insert one or more data through VALUES grammar. - - *Note: VALUES is only suitable for importing several pieces of data as DEMO. It is totally unsuitable for any test and production environment. Doris system itself is not suitable for single data import scenarios. It is recommended to use INSERT INTO SELECT for batch import.* - -* WITH LABEL - - INSERT as a load job, it can also be with a label. If not with a label, Doris will use a UUID as label. - - This feature needs Doris version 0.11+. - - *Note: It is recommended that Label be specified rather than automatically allocated by the system. If the system allocates automatically, but during the execution of the Insert Into statement, the connection is disconnected due to network errors, etc., then it is impossible to know whether Insert Into is successful. If you specify Label, you can view the task results again through Label.* - -### Load results - -Insert Into itself is a SQL command, and the return result is divided into the following types according to the different execution results: - -1. Result set is empty - - If the result set of the insert corresponding SELECT statement is empty, it is returned as follows: - - ``` - mysql> insert into tbl1 select * from empty_tbl; - Query OK, 0 rows affected (0.02 sec) - ``` - - `Query OK` indicates successful execution. `0 rows affected` means that no data was loaded. - -2. The result set is not empty - - In the case where the result set is not empty. The returned results are divided into the following situations: - - 1. Insert is successful and data is visible: - - ``` - mysql> insert into tbl1 select * from tbl2; - Query OK, 4 rows affected (0.38 sec) - {'label': 'insert_8510c568-9eda-4173-9e36-6adc7d35291c', 'status': 'visible', 'txnId': '4005'} - - mysql> insert into tbl1 with label my_label1 select * from tbl2; - Query OK, 4 rows affected (0.38 sec) - {'label': 'my_label1', 'status': 'visible', 'txnId': '4005'} - - mysql> insert into tbl1 select * from tbl2; - Query OK, 2 rows affected, 2 warnings (0.31 sec) - {'label': 'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status': 'visible', 'txnId': '4005'} - - mysql> insert into tbl1 select * from tbl2; - Query OK, 2 rows affected, 2 warnings (0.31 sec) - {'label': 'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status': 'committed', 'txnId': '4005'} - ``` - - `Query OK` indicates successful execution. `4 rows affected` means that a total of 4 rows of data were imported. `2 warnings` indicates the number of lines to be filtered. - - Also returns a json string: - - ``` - {'label': 'my_label1', 'status': 'visible', 'txnId': '4005'} - {'label': 'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status': 'committed', 'txnId': '4005'} - {'label': 'my_label1', 'status': 'visible', 'txnId': '4005', 'err': 'some other error'} - ``` - - `label` is a user-specified label or an automatically generated label. Label is the ID of this Insert Into load job. Each load job has a label that is unique within a single database. - - `status` indicates whether the loaded data is visible. If visible, show `visible`, if not, show` committed`. - - `txnId` is the id of the load transaction corresponding to this insert. - - The `err` field displays some other unexpected errors. - - When user need to view the filtered rows, the user can use the following statement - - ``` - show load where label = "xxx"; - ``` - - The URL in the returned result can be used to query the wrong data. For details, see the following **View Error Lines** Summary. -     - **"Data is not visible" is a temporary status, this batch of data must be visible eventually** - - You can view the visible status of this batch of data with the following statement: - - ``` - show transaction where id = 4005; - ``` - - If the `TransactionStatus` column in the returned result is `visible`, the data is visible. - - 2. Insert fails - - Execution failure indicates that no data was successfully loaded, and returns as follows: - - ``` - mysql> insert into tbl1 select * from tbl2 where k1 = "a"; - ERROR 1064 (HY000): all partitions have no load data. Url: http://10.74.167.16:8042/api/_load_error_log?file=__shard_2/error_log_insert_stmt_ba8bb9e158e4879-ae8de8507c0bf8a2_ba8bb9e158e4879_ae8de850e8de850 - ``` - - Where `ERROR 1064 (HY000): all partitions have no load data` shows the reason for the failure. The latter url can be used to query the wrong data. For details, see the following **View Error Lines** Summary. - -**In summary, the correct processing logic for the results returned by the insert operation should be:** - -1. If the returned result is `ERROR 1064 (HY000)`, it means that the import failed. -2. If the returned result is `Query OK`, it means the execution was successful. - - 1. If `rows affected` is 0, the result set is empty and no data is loaded. - 2. If `rows affected` is greater than 0: - 1. If `status` is` committed`, the data is not yet visible. You need to check the status through the `show transaction` statement until `visible`. - 2. If `status` is` visible`, the data is loaded successfully. - 3. If `warnings` is greater than 0, it means that some data is filtered. You can get the url through the `show load` statement to see the filtered rows. - -### SHOW LAST INSERT - -In the previous section, we described how to follow up on the results of insert operations. However, it is difficult to get the json string of the returned result in some mysql libraries. Therefore, Doris also provides the `SHOW LAST INSERT` command to explicitly retrieve the results of the last insert operation. - -After executing an insert operation, you can execute `SHOW LAST INSERT` on the same session connection. This command returns the result of the most recent insert operation, e.g. - -``` -mysql> show last insert\G -*************************** 1. row *************************** - TransactionId: 64067 - Label: insert_ba8f33aea9544866-8ed77e2844d0cc9b - Database: default_cluster:db1 - Table: t1 -TransactionStatus: VISIBLE - LoadedRows: 2 - FilteredRows: 0 -``` - -This command returns the insert results and the details of the corresponding transaction. Therefore, you can continue to execute the `show last insert` command after each insert operation to get the insert results. - -> Note: This command will only return the results of the last insert operation within the same session connection. If the connection is broken or replaced with a new one, the empty set will be returned. - -## Relevant System Configuration - -### FE configuration - -+ time out - - The timeout time of the import task (in seconds) will be cancelled by the system if the import task is not completed within the set timeout time, and will become CANCELLED. - - At present, Insert Into does not support custom import timeout time. All Insert Into imports have a uniform timeout time. The default timeout time is 1 hour. If the imported source file cannot complete the import within the specified time, the parameter ``insert_load_default_timeout_second`` of FE needs to be adjusted. - - At the same time, the Insert Into statement receives the restriction of the Session variable `query_timeout`. You can increase the timeout time by `SET query_timeout = xxx;` in seconds. - -### Session Variables - -+ enable\_insert\_strict - - The Insert Into import itself cannot control the tolerable error rate of the import. Users can only use the Session parameter `enable_insert_strict`. When this parameter is set to false, it indicates that at least one data has been imported correctly, and then it returns successfully. When this parameter is set to true, the import fails if there is a data error. The default is false. It can be set by `SET enable_insert_strict = true;`. - -+ query u timeout - - Insert Into itself is also an SQL command, so the Insert Into statement is also restricted by the Session variable `query_timeout`. You can increase the timeout time by `SET query_timeout = xxx;` in seconds. - -## Best Practices - -### Application scenarios -1. Users want to import only a few false data to verify the functionality of Doris system. The grammar of INSERT INTO VALUES is suitable at this time. -2. Users want to convert the data already in the Doris table into ETL and import it into a new Doris table, which is suitable for using INSERT INTO SELECT grammar. -3. Users can create an external table, such as MySQL external table mapping a table in MySQL system. Or create Broker external tables to map data files on HDFS. Then the data from the external table is imported into the Doris table for storage through the INSERT INTO SELECT grammar. - -### Data volume -Insert Into has no limitation on the amount of data, and large data imports can also be supported. However, Insert Into has a default timeout time, and the amount of imported data estimated by users is too large, so it is necessary to modify the system's Insert Into import timeout time. - -``` -Import data volume = 36G or less than 3600s*10M/s -Among them, 10M/s is the maximum import speed limit. Users need to calculate the average import speed according to the current cluster situation to replace 10M/s in the formula. -``` - -### Complete examples - -Users have a table store sales in the database sales. Users create a table called bj store sales in the database sales. Users want to import the data recorded in the store sales into the new table bj store sales. The amount of data imported is about 10G. - -``` -large sales scheme -(id, total, user_id, sale_timestamp, region) - -Order large sales schedule: -(id, total, user_id, sale_timestamp) - -``` - -Cluster situation: The average import speed of current user cluster is about 5M/s - -+ Step1: Determine whether you want to modify the default timeout of Insert Into - - ``` - Calculate the approximate time of import - 10G / 5M /s = 2000s - - Modify FE configuration - insert_load_default_timeout_second = 2000 - ``` - -+ Step2: Create Import Tasks - - Since users want to ETL data from a table and import it into the target table, they should use the Insert in query\\stmt mode to import it. - - ``` - INSERT INTO bj_store_sales SELECT id, total, user_id, sale_timestamp FROM store_sales where region = "bj"; - ``` - -## Common Questions - -* View the wrong line - - Because Insert Into can't control the error rate, it can only tolerate or ignore the error data completely by `enable_insert_strict`. So if `enable_insert_strict` is set to true, Insert Into may fail. If `enable_insert_strict` is set to false, then only some qualified data may be imported. However, in either case, Doris is currently unable to provide the ability to view substandard data rows. Therefore, the user cannot view the specific import error through the Insert Into statement. - - The causes of errors are usually: source data column length exceeds destination data column length, column type mismatch, partition mismatch, column order mismatch, etc. When it's still impossible to check for problems. At present, it is only recommended that the SELECT command in the Insert Into statement be run to export the data to a file, and then import the file through Stream load to see the specific errors. diff --git a/docs/en/administrator-guide/load-data/load-json-format.md b/docs/en/administrator-guide/load-data/load-json-format.md deleted file mode 100644 index 39a82aa239..0000000000 --- a/docs/en/administrator-guide/load-data/load-json-format.md +++ /dev/null @@ -1,467 +0,0 @@ ---- -{ - "title": "Load Json Format Data", - "language": "en" -} ---- - - - -# Load Json Format Data - -Doris supports data load in Json format since version 0.12. - -## Supported Load Methods - -Currently only the following load methods support data import in Json format: - -* Stream Load -* Routine Load - -For specific instructions on the above load methods, please refer to the relevant documentation. This document mainly introduces the instructions for using Json in these load methods. - -## Supported Json Format - -Currently, only the following two Json formats are supported: - -1. Multi-line data represented by Array - - Json format with Array as the root node. Each element in the Array represents a row of data to be loaded, usually an Object. Examples are as follows: - - ``` - [ - { "id": 123, "city" : "beijing"}, - { "id": 456, "city" : "shanghai"}, - ... - ] - ``` - - ``` - [ - { "id": 123, "city" : { "name" : "beijing", "region" : "haidian"}}, - { "id": 456, "city" : { "name" : "beijing", "region" : "chaoyang"}}, - ... - ] - ``` - - This method is usually used for the Stream Load method to represent multiple rows of data in a batch of load data. - - This method must be used in conjunction with setting `stripe_outer_array=true`. Doris will expand the array when parsing, and then parse each Object in turn as a row of data. - -2. Single row of data represented by Object - - Json format with Object as the root node. The entire Object represents a row of data to be loaded. Examples are as follows: - - ``` - { "id": 123, "city" : "beijing"} - ``` - - ``` - { "id": 123, "city" : { "name" : "beijing", "region" : "haidian" }} - ``` - - This method is usually used for the Routine Load method, such as representing a message in Kafka, that is, a row of data. - -## Json Path - -Doris supports extracting the data specified in Json through Json Path. - -**Note: Because for Array type data, Doris will first expand the array, and finally perform single-line processing according to the Object format. Therefore, the examples after this document will be illustrated with Json data in single Object format.** - -* Json Path is not specified - - If Json Path is not specified, Doris will use the column names in the table to find the elements in Object by default. Examples are as follows: - - The table contains two columns: `id`, `city` - - Json data is as follows: - - ``` - { "id": 123, "city" : "beijing"} - ``` - - Then Doris will use `id`, `city` to match, and get the final data `123` and `beijing`. - - If the Json data is as follows: - - ``` - { "id": 123, "name" : "beijing"} - ``` - - Then use `id`, `city` to match and get the final data `123` and `null`. - -* Json Path is specified - - Specify a set of Json Path in the form of a Json data. Each element in the array represents a column to be extracted. Examples are as follows: - - ``` - ["$.id", "$.name"] - ``` - ``` - ["$.id.sub_id", "$.name[0]", "$.city[0]"] - ``` - - Doris will use the specified Json Path for data matching and extraction. - -* Match non-primitive types - - The values that the previous example finally matched are all primitive types, such as Integer, String, and so on. Doris currently does not support complex types, such as Array, Map, etc. So when a non-primitive type is matched, Doris will convert the type to a Json format string and load it as a string type. Examples are as follows: - - ``` - { "id": 123, "city" : { "name" : "beijing", "region" : "haidian" }} - ``` - - The Json Path is `["$.city"]`. Then the matched elements are: - - ``` - { "name" : "beijing", "region" : "haidian" } - ``` - - This element will be converted into a string for subsequent load operations: - - ``` - "{'name':'beijing','region':'haidian'}" - ``` - -* Match failed - - When the match fails, `null` will be returned. Examples are as follows: - - Json data is: - - ``` - { "id": 123, "name" : "beijing"} - ``` - - The Json Path is `["$.id", "$.info"]`. Then the matched elements are `123` and `null`. - - Doris currently does not distinguish between the null value represented in the Json data and the null value generated when the match fails. Suppose the Json data is: - - ``` - { "id": 123, "name" : null } - ``` - - Then use the following two Json Path will get the same result: `123` and `null`. - - ``` - ["$.id", "$.name"] - ``` - ``` - ["$.id", "$.info"] - ``` - -* Complete match failed - - In order to prevent misoperation caused by some parameter setting errors. When Doris tries to match a row of data, if all columns fail to match, it will be considered a error row. Suppose the Json data is: - - ``` - { "id": 123, "city" : "beijing" } - ``` - - If Json Path is incorrectly written as (or when Json Path is not specified, the columns in the table do not contain `id` and `city`): - - ``` - ["$.ad", "$.infa"] - ``` - - Will result in a complete match failure, the line will be marked as an error row, instead of producing `null, null`. - -## Json Path and Columns - -Json Path is used to specify how to extract data in JSON format, and Columns specify the mapping and conversion relationship of columns. The two can be used together. - -In other words, it is equivalent to using Json Path to rearrange the data in a Json format according to the column order specified in Json Path. After that, you can use Columns to map the rearranged source data to the columns of the table. Examples are as follows: - -Data content: - -``` -{"k1": 1, "k2": 2} -``` - -Table schema: - -`k2 int, k1 int` - -Load statement 1 (take Stream Load as an example): - -``` -curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -T example.json http:/ /127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -In Load statement 1, only Json Path is specified, and Columns are not specified. The role of Json Path is to extract the Json data in the order of the fields in the Json Path, and then write it in the order of the table schema. The final loaded data results are as follows: - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 2 | 1 | -+------+------+ -``` - -You will see that the actual k1 column has loaded the value of the "k2" column in the Json data. This is because the field name in Json is not equivalent to the field name in the table schema. We need to explicitly specify the mapping relationship between the two. - -Load statement 2: - -``` -curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -H "columns: k2, k1 "-T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -Compared to load statement 1, here is the Columns field, which is used to describe the mapping relationship of columns, in the order of `k2, k1`. That is, after extracting in the order of the fields in the Json Path, specify the first column as the value of the k2 column in the table, and the second column as the value of the k1 column in the table. The final loaded data results are as follows: - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 1 | 2 | -+------+------+ -``` - -Of course, like other load methods, you can perform column conversion operations in Columns. Examples are as follows: - -``` -curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -H "columns: k2, tmp_k1 , k1 = tmp_k1 * 100" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -The above example will multiply the value of k1 by 100 and import it. The final imported data results are as follows: - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 100 | 2 | -+------+------+ -``` - -## NULL and Default value - -The sample data is as follows: - -``` -[ - {"k1": 1, "k2": "a"}, - {"k1": 2}, - {"k1": 3, "k2": "c"}, -] -``` - -The table schema is: `k1 int null, k2 varchar(32) null default "x"` - -The load statement is as follows: - -``` -curl -v --location-trusted -u root: -H "format: json" -H "strip_outer_array: true" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -The import results that users may expect are as follows, that is, for missing columns, fill in default values. - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 1 | a | -+------+------+ -| 2 | x | -+------+------+ -| 3 | c | -+------+------+ -``` - -But the actual load result is as follows, that is, for missing columns, NULL is added. - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 1 | a | -+------+------+ -| 2 | NULL | -+------+------+ -| 3 | c | -+------+------+ -``` - -This is because through the information in the load statement, Doris does not know that "the missing column is the k2 column in the table". -If you want to load the above data as expected, the load statement is as follows: - -``` -curl -v --location-trusted -u root: -H "format: json" -H "strip_outer_array: true" -H "jsonpaths: [\"$.k1\", \"$.k2\"]"- H "columns: k1, tmp_k2, k2 = ifnull(tmp_k2,'x')" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -## LargetInt and Decimal - -Doris supports data types such as largeint and decimal with larger data range and higher data precision. However, due to the fact that the maximum range of the rapid JSON library used by Doris for the resolution of digital types is Int64 and double, there may be some problems when importing largeint or decimal by JSON format, such as loss of precision, data conversion error, etc. - -For example: - -``` -[ - {"k1": 1, "k2":9999999999999.999999 } -] -``` - - -The imported K2 column type is `Decimal (16,9)`the import data is: ` 9999999999.999999`. During the JSON load which cause the precision loss of double conversion, the imported data convert to: ` 10000000000.0002 `. It is a import error. - -To solve this problem, Doris provides a param `num_as_string `. Doris converts the numeric type to a string when parsing JSON data and JSON load without losing precision. - -``` -curl -v --location-trusted -u root: -H "format: json" -H "num_as_string: true" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -But using the param will cause unexpected side effects. Doris currently does not support composite types, such as Array, Map, etc. So when a non basic type is matched, Doris will convert the type to a string in JSON format.` num_as_string`will also convert compound type numbers into strings, for example: - -JSON Data: - - { "id": 123, "city" : { "name" : "beijing", "city_id" : 1 }} - -Not use `num_as_string `, the data of the city column is: - -`{ "name" : "beijing", "city_id" : 1 }` - -Use `num_as_string `, the data of the city column is: - -`{ "name" : "beijing", "city_id" : "1" }` - -Warning, the param leads to the city_id of the numeric type in the compound type is treated as a string column and quoted, which is different from the original data. - -Therefore, when using JSON load. we should try to avoid importing largeint, decimal and composite types at the same time. If you can't avoid it, you need to fully understand the **side effects**. - -## Examples - -### Stream Load - -Because of the indivisible nature of the Json format, when using Stream Load to load a Json format file, the file content will be fully loaded into memory before processing. Therefore, if the file is too large, it may occupy more memory. - -Suppose the table structure is: - -``` -id INT NOT NULL, -city VARHCAR NULL, -code INT NULL -``` - -1. Load single-line data 1 - - ``` - {"id": 100, "city": "beijing", "code" : 1} - ``` - - * Not specify Json Path - - ``` - curl --location-trusted -u user:passwd -H "format: json" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - Results: - - ``` - 100 beijing 1 - ``` - - * Specify Json Path - - ``` - curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - Results: - - ``` - 100 beijing 1 - ``` - -2. Load sigle-line data 2 - - ``` - {"id": 100, "content": {"city": "beijing", "code" : 1}} - ``` - - * Specify Json Path - - ``` - curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.content.city\",\"$.content.code\"]" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - Results: - - ``` - 100 beijing 1 - ``` - -3. Load multi-line data - - ``` - [ - {"id": 100, "city": "beijing", "code" : 1}, - {"id": 101, "city": "shanghai"}, - {"id": 102, "city": "tianjin", "code" : 3}, - {"id": 103, "city": "chongqing", "code" : 4}, - {"id": 104, "city": ["zhejiang", "guangzhou"], "code" : 5}, - { - "id": 105, - "city": { - "order1": ["guangzhou"] - }, - "code" : 6 - } - ] - ``` - - * Specify Json Path - - ``` - curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -H "strip_outer_array: true" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - Results: - - ``` - 100 beijing 1 - 101 shanghai NULL - 102 tianjin 3 - 103 chongqing 4 - 104 ["zhejiang","guangzhou"] 5 - 105 {"order1":["guangzhou"]} 6 - ``` - -4. Convert load data - - The data is still the multi-row data in Example 3. Now you need to add 1 to the `code` column in the loaded data and load it. - - ``` - curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -H "strip_outer_array: true" -H "columns: id, city, tmpc, code=tmpc+1" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - Results: - - ``` - 100 beijing 2 - 101 shanghai NULL - 102 tianjin 4 - 103 chongqing 5 - 104 ["zhejiang","guangzhou"] 6 - 105 {"order1":["guangzhou"]} 7 - ``` - -### Routine Load - -Routine Load processes Json data the same as Stream Load. I will not repeat them here. - -For the Kafka data source, the content of each Massage is treated as a complete Json data. If multiple rows of data expressed in Array format in a Massage are loaded, multiple rows will be loaded, and Kafka's offset will only increase by 1. If an Array format Json represents multiple rows of data, but because the Json format error causes the parsing Json to fail, the error row will only increase by 1 (because the parsing fails, in fact, Doris cannot determine how many rows of data it contains, and can only add one row of errors rows record). diff --git a/docs/en/administrator-guide/load-data/load-manual.md b/docs/en/administrator-guide/load-data/load-manual.md deleted file mode 100644 index 8d813a75f5..0000000000 --- a/docs/en/administrator-guide/load-data/load-manual.md +++ /dev/null @@ -1,228 +0,0 @@ ---- -{ - "title": "Introduction Overview", - "language": "en" -} ---- - - - -# Introduction Overview - -The Load function is to import the user's raw data into Doris. After successful import, users can query data through Mysql client. - -Doris supports multiple imports. It is recommended to read this document in full first, and then to view the detailed documents of their respective import modes according to the selected import mode. - -## Basic concepts - -1. Frontend (FE): Metadata and scheduling nodes of Doris system. In the import process, it is mainly responsible for the generation of import planning and the scheduling of import tasks. -2. Backend (BE): The computing and storage nodes of Doris system. In the import process, it is mainly responsible for ETL and storage of data. -3. Broker: Broker is an independent stateless process. It encapsulates the file system interface and provides Doris with the ability to read files in the remote storage system. -4. Load job: The import job reads the source data submitted by the user, transforms or cleans it, and imports the data into the Doris system. After the import is completed, the data can be queried by the user. -5. Label: All import jobs have a Label. Label is unique in a database and can be specified by the user or automatically generated by the system to identify an import job. The same Label can only be used for a successful import job. -6. MySQL Protocol/HTTP Protocol: Doris provides two kinds of access protocol interfaces. MySQL protocol and HTTP protocol. Part of the import mode uses MySQL protocol interface to submit jobs, and part of the import mode uses HTTP protocol interface to submit jobs. - -## Load mode - -To adapt to different data import requirements, Doris system provides 6 different import methods. Each import mode supports different data sources and has different usage modes (asynchronous, synchronous). - -All import methods support CSV data format. Broker load also supports parquet and orc data format. - -For instructions on each import mode, please refer to the operation manual for a single import mode. - -* Broker load - - Access and read external data sources (such as HDFS) through the Broker process and import them into Doris. The user submits the import job through Mysql protocol and executes it asynchronously. View the import results through the `SHOW LOAD` command. - -* Stream load - - Users submit requests through HTTP protocol and create imports with raw data. It is mainly used to quickly import data from local files or data streams into Doris. The Import command returns the import result synchronously. - -* Insert - - Similar to the Insert statement in MySQL, Doris provides `INSERT INTO tbl SELECT ...;`reading data from Doris's table and importing it into another table. Or by `INSERT INTO tbl VALUES (...);` Insert a single piece of data. - -* Multi load - - Users submit multiple import jobs through HTTP protocol. Multi Load guarantees the atomic validity of multiple import jobs. - -* Routine load - - Users submit routine import jobs through MySQL protocol, generate a resident thread, read and import data from data sources (such as Kafka) uninterruptedly into Doris. - -* Load through S3 protocol - - Users directly load data through the S3 protocol, and the usage is similar to Broker Load - -## Basic Principles - -### Import execution process - - -``` -+---------+ +---------+ +----------+ +-----------+ -| | | | | | | | -| PENDING +----->+ ETL +----->+ LOADING +----->+ FINISHED | -| | | | | | | | -+---------+ +---+-----+ +----+-----+ +-----------+ - | | | - | | | - | | | - | | | +-----------+ - | | | | | - +---------------+-----------------+------------> CANCELLED | - | | - +-----------+ - -``` - -As shown above, an import operation mainly goes through the four stages above. - -+ PENDING (not required): Only Broker Load has this stage. Broker Load is submitted by the user and stays at this stage for a short time until it is scheduled by Scheduler in FE. Scheduler's schedule interval is 5 seconds. - -+ ETL (not required): This stage exists before version 0.10.0 (included), mainly for transforming raw data according to user declaration and filtering raw data that does not meet the requirements. In the version after 0.10.0, the ETL phase no longer exists, and the work of data transformation is merged into the LOADING phase. - -+ LOADING: This stage is mainly used to push the transformed data into the corresponding BE storage before version 0.10.0 (including). In the version after 0.10.0, the data is cleaned and changed first, and then sent to BE storage. When all imported data are imported, the process of waiting for validity enters, and Load job is still LOADING. - -+ FINISHED: After all the data involved in Load Job takes effect, the state of Load Job becomes FINISHED. Data imported after FINISHED can be queried. - -+ CANCELLED: Before job FINISH, jobs may be cancelled and entered the CANCELLED state. For example, the user manually cancels, or imports errors. CANCELLED is also the final state of Load Job and cannot be executed again. - -In the above stage, except for the PENDING to LOADING stage, which is scheduled by Scheduler, the transfer before other stages is implemented by callback mechanism. - -### Label and Atomicity - -Doris provides atomic assurance for all import methods. It ensures that the data in the same import operation is valid for atoms. There will be no case of importing only part of the data. - -At the same time, each import job has a Label designated by the user or automatically generated by the system. Label is unique in a database. When an import job corresponding to a Label is successful enough, the import job cannot be submitted repeatedly using the Label. If the import job corresponding to Label fails, it can be reused. - -Users can use Label mechanism to ensure that the data corresponding to Label can be imported at most once, at the level of At-Most-One semantics. - - -## Synchronization and asynchronization - -Doris's current import methods fall into two categories, synchronous and asynchronous. If an external program accesses Doris's import function, it is necessary to determine which type of import mode is used and then determine the access logic. - -### Synchronization - -Synchronized import means that users create import tasks, Doris executes import synchronously, and returns user import results after execution. Users can directly determine whether the import is successful or not by synchronizing the results returned by creating the import task command. - -The import methods of synchronous type are **Stream load**, **Insert**. - -Operation steps: - -1. Users (external systems) create import tasks. -2. Doris returns the import result. -3. The user (external system) judges the import result and can submit the import task again if it fails. - -*Note: If the user returns the import synchronously and the amount of data imported is too large, it may take a long time to create the import request to return the result.* - -### Asynchronism -Asynchronous import means that after the user creates the import task, Doris directly returns to the successful creation. **Successful creation does not mean that data has been imported into**. The import task will be executed asynchronously. After successful creation, users need to send a polling command to check the status of the import job. If the creation fails, you can judge whether it needs to be created again based on the failure information. - -The ways to import asynchronous types are: **Broker load**, **Multi load**. - -Operation steps: - -1. Users (external systems) create import tasks. -2. Doris returns the import creation result. -3. User (external system) judges the result of import creation, success enters 4, failure returns to retry to create import, return to 1. -4. The user (external system) polls to see the import task until the status changes to FINISHED or CANCELLED. - -### Notes -Neither asynchronous nor synchronous import types should be retried endlessly after Doris returns an import failure or an import creation failure. **After a limited number of retries and failures, the external system retains the failure information. Most of the retries fail because of the problem of using method or data itself.** - -## Memory Limit - -Users can limit the memory usage of a single load by setting parameters to prevent the system from taking up too much memory and causing the system OOM. -Different load methods restrict memory in a slightly different way. You can refer to the respective load manuals for viewing. - -An load job is usually distributed across multiple Backends. The load memory limit is the memory usage of load job on a single Backend, not memory usage across the cluster. - -At the same time, each Backend sets the overall upper limit of the memory available for load. See the General System Configuration section below for specific configuration. This configuration limits the overall memory usage limit for all load tasks running on this Backend. - -Smaller memory limits can affect load efficiency because the load process can frequently write in-memory data back to disk because memory reaches the upper limit. Excessive memory limits can cause system OOM when load concurrency is high. Therefore, you need to properly set the load memory limit according to your needs. - -## Best Practices - -When users access Doris import, they usually use program access mode to ensure that data is imported into Doris regularly. Below is a brief description of the best practices for program access to Doris. - -1. Choose the appropriate import mode: According to the location of the data source, choose the import mode. For example, if raw data is stored on HDFS, import it using Broker load. -2. Protocol for determining the import mode: If Broker load import mode is selected, external systems need to be able to submit and view import jobs regularly using MySQL protocol. -3. Determine the type of import mode: import mode is synchronous or asynchronous. For example, Broker load is an asynchronous import mode. After submitting the creation import, the external system must call the check import command to determine whether the import is successful or not based on the results of the check import command. -4. Label generation strategy: Label generation strategy needs to be satisfied, and each batch of data is unique and fixed. Doris can then guarantee At-Most-Once. -5. The program itself guarantees At-Least-Once: The external system needs to guarantee its own At-Least-Once, so that Exactly-Once of the import process can be guaranteed. - -## General System Configuration - -The following sections explain several system-level configurations that are common to all imports. - -### FE configuration - -The following configuration belongs to the system configuration of FE, which can be modified by modifying the configuration file ``fe.conf``. - -+ max\_load\_timeout\_second and min\_load\_timeout\_second - - The two configurations mean the maximum import timeout time and the minimum import timeout time in seconds. The default maximum timeout time is 3 days and the default minimum timeout time is 1 second. User-defined import timeouts should not exceed this range. This parameter is applicable to all import modes. - -+ desired\_max\_waiting\_jobs - - The maximum number of imported tasks in the waiting queue is 100 by default. New import requests are rejected when the number of imports in the PENDING state (i.e. waiting for execution) in FE exceeds that value. - - This configuration is only valid for asynchronous execution of imports. When the number of import waiting for asynchronous execution exceeds the default value, subsequent creation of import requests will be rejected. - -+ max\_running\_txn\_num\_per\_db - - The implication of this configuration is that the maximum number of running load jobs in each database (no distinction between import types, uniform counting). The default value is 100. When the current database is running more than the maximum number of imports, subsequent imports will not be executed. If the job is imported synchronously, the import will be rejected. If it is an asynchronous import job. The job will wait in the queue. - -### BE configuration - -The following configuration belongs to the BE system configuration, which can be modified by modifying the BE configuration file `be.conf`. - -+ push\_write\_mbytes\_per\_sec - - Writing speed limit for a single Tablet on BE. The default is 10, or 10MB/s. Usually the maximum write speed of BE to a single Tablet is between 10 and 30 MB/s, depending on Schema and the system. This parameter can be adjusted appropriately to control the import speed. - -+ write\_buffer\_size - - The imported data will be written to a memtable on BE, and the memtable will not be written back to disk until it reaches the threshold. The default size is 100MB. Too small threshold may result in a large number of small files on BE. This threshold can be increased appropriately to reduce the number of files. However, excessive thresholds can lead to RPC timeouts, as shown in the configuration instructions below. - -+ tablet\_writer\_rpc\_timeout\_sec - - During the import process, a Batch (1024 rows) RPC timeout is sent. Default 600 seconds. Because the RPC may involve multiple memtable writes, it may cause RPC timeouts, which can be adjusted appropriately to reduce timeout errors (such as `send batch fail`). At the same time, if the `write_buffer_size` configuration is increased, this parameter needs to be adjusted appropriately. - -+ streaming\_load\_rpc\_max\_alive\_time\_sec - - During the import process, Doris opens a Writer for each Tablet to receive and write data. This parameter specifies Writer's waiting timeout time. If Writer does not receive any data at this time, Writer will be destroyed automatically. When the system processing speed is slow, Writer may not receive the next batch of data for a long time, resulting in import error: `Tablet Writer add batch with unknown id`. This configuration can be increased appropriately at this time. The default is 600 seconds. - -+ load\_process\_max\_memory\_limit\_bytes and load\_process\_max\_memory\_limit\_percent - - These two parameters limit the upper memory limit that can be used to load tasks on a single Backend. The maximum memory and maximum memory percentage are respectively. `load_process_max_memory_limit_percent` defaults to 80%, which is 80% of the `mem_limit` configuration. That is, if the physical memory is M, the default load memory limit is M * 80% * 80%. - -     `load_process_max_memory_limit_bytes` defaults to 100GB. The system takes the smaller of the two parameters as the final Backend load memory usage limit. - -+ label\_keep\_max\_second - - The retention time of load job which is FINISHED or CANCELLED. The record of load job will be kept in Doris system for a period of time which is determined by this parameter. The default time of this parameter is 3 days. This parameter is common to all types of load job. - -### Column mapping -Assuming that the imported data is `1, 2, 3` and the table has three columns of `c1, c2, c3`, if the data is directly imported into the table, you can use the following statement `COLUMNS(c1,c2,c3)` This statement is equivalent to `COLUMNS(tmp_c1,tmp_c2,tmp_c3,c1=tmp_c1,c2=tmp_c2,c3=tmp_c3)` -If you want to perform transformation or use temporary variables when importing data, the transformation or temporary variables must be specified in the order of use, for example, `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = tmp_c1 +1, c2= c1+1, c3 = c2+1)`, this statement is equivalent to `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = tmp_c1 +1, c2 = tmp_c1 +1+1, c3 =tmp_c1 +1+1+1)` -When using an expression, this expression must be defined in front. For example, the following statement is not legal `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = c1+1, c2 = temp + 1, temp = tmp_c1 +1, c3 =c2+1)` \ No newline at end of file diff --git a/docs/en/administrator-guide/load-data/routine-load-manual.md b/docs/en/administrator-guide/load-data/routine-load-manual.md deleted file mode 100644 index 8d54bc0480..0000000000 --- a/docs/en/administrator-guide/load-data/routine-load-manual.md +++ /dev/null @@ -1,334 +0,0 @@ ---- -{ - "title": "Routine Load", - "language": "en" -} ---- - - - -# Routine Load - -The Routine Load feature provides users with a way to automatically load data from a specified data source. - -This document describes the implementation principles, usage, and best practices of this feature. - -## Glossary - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, the backend node of Doris. Responsible for query execution and data storage. -* RoutineLoadJob: A routine load job submitted by the user. -* JobScheduler: A routine load job scheduler for scheduling and dividing a RoutineLoadJob into multiple Tasks. -* Task: RoutineLoadJob is divided by JobScheduler according to the rules. -* TaskScheduler: Task Scheduler. Used to schedule the execution of a Task. - -## Principle - -``` - +---------+ - | Client | - +----+----+ - | -+-----------------------------+ -| FE | | -| +-----------v------------+ | -| | | | -| | Routine Load Job | | -| | | | -| +---+--------+--------+--+ | -| | | | | -| +---v--+ +---v--+ +---v--+ | -| | task | | task | | task | | -| +--+---+ +---+--+ +---+--+ | -| | | | | -+-----------------------------+ - | | | - v v v - +---+--+ +--+---+ ++-----+ - | BE | | BE | | BE | - +------+ +------+ +------+ - -``` - -As shown above, the client submits a routine load job to FE. - -FE splits an load job into several Tasks via JobScheduler. Each Task is responsible for loading a specified portion of the data. The Task is assigned by the TaskScheduler to the specified BE. - -On the BE, a Task is treated as a normal load task and loaded via the Stream Load load mechanism. After the load is complete, report to FE. - -The JobScheduler in the FE continues to generate subsequent new Tasks based on the reported results, or retry the failed Task. - -The entire routine load job completes the uninterrupted load of data by continuously generating new Tasks. - -## Kafka Routine load - -Currently we only support routine load from the Kafka system. This section details Kafka's routine use and best practices. - -### Usage restrictions - -1. Support unauthenticated Kafka access and Kafka clusters certified by SSL. -2. The supported message format is csv text or json format. Each message is a line in csv format, and the end of the line does not contain a ** line break. -3. Kafka 0.10.0 (inclusive) or above is supported by default. If you want to use Kafka versions below 0.10.0 (0.9.0, 0.8.2, 0.8.1, 0.8.0), you need to modify the configuration of be, set the value of kafka_broker_version_fallback to be the older version, or directly set the value of property.broker.version.fallback to the old version when creating routine load. The cost of the old version is that some of the new features of routine load may not be available, such as setting the offset of the kafka partition by time. - -### Create a routine load task - -The detailed syntax for creating a routine load task can be connected to Doris and execute `HELP ROUTINE LOAD;` to see the syntax help. Here is a detailed description of the precautions when creating a job. - -* columns_mapping - - `columns_mapping` is mainly used to specify the column structure of the table structure and message, as well as the conversion of some columns. If not specified, Doris will default to the columns in the message and the columns of the table structure in a one-to-one correspondence. Although under normal circumstances, if the source data is exactly one-to-one, normal data load can be performed without specifying. However, we still strongly recommend that users **explicitly specify column mappings**. This way, when the table structure changes (such as adding a nullable column), or the source file changes (such as adding a column), the load task can continue. Otherwise, after the above changes occur, the load will report an error because the column mapping relationship is no longer one-to-one. - - In `columns_mapping` we can also use some built-in functions for column conversion. But you need to pay attention to the actual column type corresponding to the function parameters. for example: - - Suppose the user needs to load a table containing only a column of `k1` with a column type of `int`. And you need to convert the null value in the source file to 0. This feature can be implemented with the `ifnull` function. The correct way to use is as follows: - - `COLUMNS (xx, k1=ifnull(xx, "3"))` - - Note that we use `"3"` instead of `3`, although `k1` is of type `int`. Because the column type in the source data is `varchar` for the load task, the `xx` virtual column is also of type `varchar`. So we need to use `"3"` to match the match, otherwise the `ifnull` function can't find the function signature with the parameter `(varchar, int)`, and an error will occur. - - As another example, suppose the user needs to load a table containing only a column of `k1` with a column type of `int`. And you need to process the corresponding column in the source file: convert the negative number to a positive number and the positive number to 100. This function can be implemented with the `case when` function. The correct wording should be as follows: - - `COLUMNS (xx, k1 = case when xx < 0 then cast(-xx as varchar) else cast((xx + '100') as varchar) end)` - - Note that we need to convert all the parameters in `case when` to varchar in order to get the desired result. - -* where_predicates - - The type of the column in `where_predicates` is already the actual column type, so there is no need to cast to the varchar type as `columns_mapping`. Write according to the actual column type. - -* desired\_concurrent\_number - - `desired_concurrent_number` is used to specify the degree of concurrency expected for a routine job. That is, a job, at most how many tasks are executing at the same time. For Kafka load, the current actual concurrency is calculated as follows: - - ``` - Min(partition num, desired_concurrent_number, Config.max_routine_load_task_concurrrent_num) - ``` - - Where `Config.max_routine_load_task_concurrrent_num` is a default maximum concurrency limit for the system. This is a FE configuration that can be adjusted by changing the configuration. The default is 5. - - Where partition num refers to the number of partitions for the Kafka topic subscribed to. - -* max\_batch\_interval/max\_batch\_rows/max\_batch\_size - - These three parameters are used to control the execution time of a single task. If any of the thresholds is reached, the task ends. Where `max_batch_rows` is used to record the number of rows of data read from Kafka. `max_batch_size` is used to record the amount of data read from Kafka in bytes. The current consumption rate for a task is approximately 5-10MB/s. - - So assume a row of data 500B, the user wants to be a task every 100MB or 10 seconds. The expected processing time for 100MB is 10-20 seconds, and the corresponding number of rows is about 200000 rows. Then a reasonable configuration is: - - ``` - "max_batch_interval" = "10", - "max_batch_rows" = "200000", - "max_batch_size" = "104857600" - ``` - - The parameters in the above example are also the default parameters for these configurations. - -* max\_error\_number - - `max_error_number` is used to control the error rate. When the error rate is too high, the job will automatically pause. Because the entire job is stream-oriented, and because of the borderless nature of the data stream, we can't calculate the error rate with an error ratio like other load tasks. So here is a new way of calculating to calculate the proportion of errors in the data stream. - - We have set up a sampling window. The size of the window is `max_batch_rows * 10`. Within a sampling window, if the number of error lines exceeds `max_error_number`, the job is suspended. If it is not exceeded, the next window restarts counting the number of error lines. - - We assume that `max_batch_rows` is 200000 and the window size is 2000000. Let `max_error_number` be 20000, that is, the user expects an error behavior of 20000 for every 2000000 lines. That is, the error rate is 1%. But because not every batch of tasks consumes 200000 rows, the actual range of the window is [2000000, 2200000], which is 10% statistical error. - - The error line does not include rows that are filtered out by the where condition. But include rows that do not have a partition in the corresponding Doris table. - -* data\_source\_properties - - The specific Kafka partition can be specified in `data_source_properties`. If not specified, all partitions of the subscribed topic are consumed by default. - - Note that when partition is explicitly specified, the load job will no longer dynamically detect changes to Kafka partition. If not specified, the partitions that need to be consumed are dynamically adjusted based on changes in the kafka partition. - -* strict\_mode - - Routine load load can turn on strict mode mode. The way to open it is to add ```"strict_mode" = "true"``` to job\_properties. The default strict mode is off. - - The strict mode mode means strict filtering of column type conversions during the load process. The strict filtering strategy is as follows: - - 1. For column type conversion, if strict mode is true, the wrong data will be filtered. The error data here refers to the fact that the original data is not null, and the result is a null value after participating in the column type conversion. - - 2. When a loaded column is generated by a function transformation, strict mode has no effect on it. - - 3. For a column type loaded with a range limit, if the original data can pass the type conversion normally, but cannot pass the range limit, strict mode will not affect it. For example, if the type is decimal(1,0) and the original data is 10, it is eligible for type conversion but not for column declarations. This data strict has no effect on it. - -* merge\_type - The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics - - -#### strict mode and load relationship of source data - -Here is an example of a column type of TinyInt. - -> Note: When a column in a table allows a null value to be loaded - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|---------| -|null | \N | N/A | true or false | NULL| -|not null | aaa or 2000 | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 | 1 | true or false | correct data| - -Here the column type is Decimal(1,0) -  -> Note: When a column in a table allows a null value to be loaded - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|--------| -|null | \N | N/A | true or false | NULL| -|not null | aaa | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 or 10 | 1 | true or false | correct data| - -> Note: 10 Although it is a value that is out of range, because its type meets the requirements of decimal, strict mode has no effect on it. 10 will eventually be filtered in other ETL processing flows. But it will not be filtered by strict mode. - -#### Accessing SSL-certified Kafka clusters - -Accessing the SSL-certified Kafka cluster requires the user to provide a certificate file (ca.pem) for authenticating the Kafka Broker public key. If the Kafka cluster has both client authentication enabled, you will also need to provide the client's public key (client.pem), key file (client.key), and key password. The files needed here need to be uploaded to Doris via the `CREAE FILE` command, **and the catalog name is `kafka`**. See `HELP CREATE FILE;` for specific help on the `CREATE FILE` command. Here is an example: - -1. Upload file - - ``` - CREATE FILE "ca.pem" PROPERTIES("url" = "https://example_url/kafka-key/ca.pem", "catalog" = "kafka"); - CREATE FILE "client.key" PROPERTIES("url" = "https://example_urlkafka-key/client.key", "catalog" = "kafka"); - CREATE FILE "client.pem" PROPERTIES("url" = "https://example_url/kafka-key/client.pem", "catalog" = "kafka"); - ``` - -2. Create a routine load job - - ``` - CREATE ROUTINE LOAD db1.job1 on tbl1 - PROPERTIES - ( - "desired_concurrent_number"="1" - ) - FROM KAFKA - ( - "kafka_broker_list"= "broker1:9091,broker2:9091", - "kafka_topic" = "my_topic", - "property.security.protocol" = "ssl", - "property.ssl.ca.location" = "FILE:ca.pem", - "property.ssl.certificate.location" = "FILE:client.pem", - "property.ssl.key.location" = "FILE:client.key", - "property.ssl.key.password" = "abcdefg" - ); - ``` - -> Doris accesses Kafka clusters via Kafka's C++ API `librdkafka`. The parameters supported by `librdkafka` can be found. -> -> - -### Viewing the status of the load job - -Specific commands and examples for viewing the status of the **job** can be viewed with the `HELP SHOW ROUTINE LOAD;` command. - -Specific commands and examples for viewing the **Task** status can be viewed with the `HELP SHOW ROUTINE LOAD TASK;` command. - -You can only view tasks that are currently running, and tasks that have ended and are not started cannot be viewed. - -### Alter job - -Users can modify jobs that have been created. Specific instructions can be viewed through the `HELP ALTER ROUTINE LOAD;` command. Or refer to [ALTER ROUTINE LOAD](../../sql-reference/sql-statements/Data%20Manipulation/alter-routine-load.md). - -### Job Control - -The user can control the stop, pause and restart of the job by the three commands `STOP/PAUSE/RESUME`. You can view help and examples with the three commands `HELP STOP ROUTINE LOAD;`, `HELP PAUSE ROUTINE LOAD;` and `HELP RESUME ROUTINE LOAD;`. - -## other instructions - -1. The relationship between a routine load job and an ALTER TABLE operation - - * Routine load does not block SCHEMA CHANGE and ROLLUP operations. Note, however, that if the column mappings are not matched after SCHEMA CHANGE is completed, the job's erroneous data will spike and eventually cause the job to pause. It is recommended to reduce this type of problem by explicitly specifying column mappings in routine load jobs and by adding Nullable columns or columns with Default values. - * Deleting a Partition of a table may cause the loaded data to fail to find the corresponding Partition and the job will be paused. - -2. Relationship between routine load jobs and other load jobs (LOAD, DELETE, INSERT) - - * Routine load does not conflict with other LOAD jobs and INSERT operations. - * When performing a DELETE operation, the corresponding table partition cannot have any load tasks being executed. Therefore, before performing the DELETE operation, you may need to pause the routine load job and wait for the delivered task to complete before you can execute DELETE. - -3. Relationship between routine load jobs and DROP DATABASE/TABLE operations - - When the corresponding database or table is deleted, the job will automatically CANCEL. - -4. The relationship between the kafka type routine load job and kafka topic - - When the user creates a routine load declaration, the `kafka_topic` does not exist in the kafka cluster. - - * If the broker of the user kafka cluster has `auto.create.topics.enable = true` set, `kafka_topic` will be automatically created first, and the number of partitions created automatically will be in the kafka cluster** of the user side. The broker is configured with `num.partitions`. The routine job will continue to read the data of the topic continuously. - * If the broker of the user kafka cluster has `auto.create.topics.enable = false` set, topic will not be created automatically, and the routine will be paused before any data is read, with the status `PAUSED`. - - So, if the user wants to be automatically created by the routine when the kafka topic does not exist, just set the broker in **the kafka cluster of the user's side** to set auto.create.topics.enable = true` . - -5. Problems that may occur in the some environment - In some environments, there are isolation measures for network segment and domain name resolution. So should pay attention to: - 1. The broker list specified in the routine load task must be accessible on the doris environment. - 2. If `advertised.listeners` is configured in kafka, The addresses in `advertised.listeners` need to be accessible on the doris environment. - -6. About specified Partition and Offset - - Doris supports specifying Partition and Offset to start consumption. The new version also supports the consumption function at a specified time point. The configuration relationship of the corresponding parameters is explained here. - - There are three relevant parameters: - - * `kafka_partitions`: Specify the list of partitions to be consumed, such as: "0, 1, 2, 3". - * `kafka_offsets`: Specify the starting offset of each partition, which must correspond to the number of `kafka_partitions` lists. Such as: "1000, 1000, 2000, 2000" - * `property.kafka_default_offset`: Specify the default starting offset of the partition. - - When creating an routine load job, these three parameters can have the following combinations: - - | Combinations | `kafka_partitions` | `kafka_offsets` | `property.kafka_default_offset` | Behavior | - |---|---|---|---|---| - |1| No | No | No | The system will automatically find all the partitions corresponding to the topic and start consumption from OFFSET_END | - |2| No | No | Yes | The system will automatically find all the partitions corresponding to the topic and start consumption from the position specified by the default offset | - |3| Yes | No | No | The system will start consumption from the OFFSET_END of the specified partition | - |4| Yes | Yes | No | The system will start consumption from the specified offset of the specified partition | - |5| Yes | No | Yes | The system will start consumption from the specified partition and the location specified by the default offset | - - 7. The difference between STOP and PAUSE - - the FE will automatically clean up stopped ROUTINE LOAD,while paused ROUTINE LOAD can be resumed - -## Related parameters - -Some system configuration parameters can affect the use of routine loads. - -1. max\_routine\_load\_task\_concurrent\_num - - The FE configuration item, which defaults to 5, can be modified at runtime. This parameter limits the maximum number of subtask concurrency for a routine load job. It is recommended to maintain the default value. If the setting is too large, it may cause too many concurrent tasks and occupy cluster resources. - -2. max\_routine_load\_task\_num\_per\_be - - The FE configuration item, which defaults to 5, can be modified at runtime. This parameter limits the number of subtasks that can be executed concurrently by each BE node. It is recommended to maintain the default value. If the setting is too large, it may cause too many concurrent tasks and occupy cluster resources. - -3. max\_routine\_load\_job\_num - - The FE configuration item, which defaults to 100, can be modified at runtime. This parameter limits the total number of routine load jobs, including NEED_SCHEDULED, RUNNING, PAUSE. After the overtime, you cannot submit a new assignment. - -4. max\_consumer\_num\_per\_group - - BE configuration item, the default is 3. This parameter indicates that up to several consumers are generated in a subtask for data consumption. For a Kafka data source, a consumer may consume one or more kafka partitions. Suppose a task needs to consume 6 kafka partitions, it will generate 3 consumers, and each consumer consumes 2 partitions. If there are only 2 partitions, only 2 consumers will be generated, and each consumer will consume 1 partition. - -5. push\_write\_mbytes\_per\_sec - - BE configuration item. The default is 10, which is 10MB/s. This parameter is to load common parameters, not limited to routine load jobs. This parameter limits the speed at which loaded data is written to disk. For high-performance storage devices such as SSDs, this speed limit can be appropriately increased. - -6. max\_tolerable\_backend\_down\_num - FE configuration item, the default is 0. Under certain conditions, Doris can reschedule PAUSED tasks, that becomes RUNNING?This parameter is 0, which means that rescheduling is allowed only when all BE nodes are in alive state. - -7. period\_of\_auto\_resume\_min - FE configuration item, the default is 5 mins. Doris reschedules will only try at most 3 times in the 5 minute period. If all 3 times fail, the current task will be locked, and auto-scheduling will not be performed. However, manual intervention can be performed. diff --git a/docs/en/administrator-guide/load-data/s3-load-manual.md b/docs/en/administrator-guide/load-data/s3-load-manual.md deleted file mode 100644 index b9c2b2a856..0000000000 --- a/docs/en/administrator-guide/load-data/s3-load-manual.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -{ -"title": "S3 Load", -"language": "zh-CN" -} ---- - - - -# S3 Load - -Starting from version 0.14, Doris supports the direct import of data from online storage systems that support the S3 protocol through the S3 protocol. - -This document mainly introduces how to import data stored in AWS S3. It also supports the import of other object storage systems that support the S3 protocol, such as Baidu Cloud’s BOS, Alibaba Cloud’s OSS and Tencent Cloud’s COS, etc. -## Applicable scenarios - -* Source data in S3 protocol accessible storage systems, such as S3, BOS. -* Data volumes range from tens to hundreds of GB. - -## Preparing -1. Standard AK and SK - First, you need to find or regenerate AWS `Access keys`, you can find the generation method in `My Security Credentials` of AWS console, as shown in the following figure: - [AK_SK](/images/aws_ak_sk.png) - Select `Create New Access Key` and pay attention to save and generate AK and SK. -2. Prepare REGION and ENDPOINT - REGION can be selected when creating the bucket or can be viewed in the bucket list. ENDPOINT can be found through REGION on the following page [AWS Documentation](https://docs.aws.amazon.com/general/latest/gr/s3.html#s3_region) - -Other cloud storage systems can find relevant information compatible with S3 in corresponding documents - -## Start Loading -Like Broker Load just replace `WITH BROKER broker_name ()` with -``` - WITH S3 - ( - "AWS_ENDPOINT" = "AWS_ENDPOINT", - "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", - "AWS_SECRET_KEY"="AWS_SECRET_KEY", - "AWS_REGION" = "AWS_REGION" - ) -``` - -example: -``` - LOAD LABEL example_db.exmpale_label_1 - ( - DATA INFILE("s3://your_bucket_name/your_file.txt") - INTO TABLE load_test - COLUMNS TERMINATED BY "," - ) - WITH S3 - ( - "AWS_ENDPOINT" = "AWS_ENDPOINT", - "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", - "AWS_SECRET_KEY"="AWS_SECRET_KEY", - "AWS_REGION" = "AWS_REGION" - ) - PROPERTIES - ( - "timeout" = "3600" - ); -``` - -## FAQ - -S3 SDK uses virtual-hosted style by default. However, some object storage systems may not be enabled or support virtual-hosted style access. At this time, we can add the `use_path_style` parameter to force the use of path style: - -``` - WITH S3 - ( - "AWS_ENDPOINT" = "AWS_ENDPOINT", - "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", - "AWS_SECRET_KEY"="AWS_SECRET_KEY", - "AWS_REGION" = "AWS_REGION", - "use_path_style" = "true" - ) -``` diff --git a/docs/en/administrator-guide/load-data/sequence-column-manual.md b/docs/en/administrator-guide/load-data/sequence-column-manual.md deleted file mode 100644 index aeb62e3621..0000000000 --- a/docs/en/administrator-guide/load-data/sequence-column-manual.md +++ /dev/null @@ -1,208 +0,0 @@ ---- -{ - "title": "Sequence Column", - "language": "en" -} ---- - - - -# Sequence Column -The Sequence Column currently only supports the Uniq model. The Uniq model is mainly for scenarios requiring a unique primary key, which can guarantee the uniqueness constraint of the primary key. However, due to the use of REPLACE aggregation, the replacement sequence is not guaranteed for data imported in the same batch, which can be described in detail [here](../../getting-started/data-model-rollup.md). If the order of substitution is not guaranteed, then the specific data that is finally imported into the table cannot be determined, and there is uncertainty. - -To solve this problem, Doris supported a sequence column by allowing the user to specify the sequence column when importing. Under the same key column, columns of the REPLACE aggregate type will be replaced according to the value of the sequence column, larger values can be replaced with smaller values, and vice versa. In this method, the order is determined by the user, and the user controls the replacement order. - -## Principle - -Implemented by adding a hidden column `__DORIS_SEQUENCE_COL__`, the type of the column is specified by the user while create the table, determines the specific value of the column on import, and replaces the REPLACE column with that value. - -### Create Table -When you create the Uniq table, a hidden column `__DORIS_SEQUENCE_COL__` is automatically added, depending on the type specified by the user - -### Import - -When importing, fe sets the value of the hidden column during parsing to the value of the 'order by' expression (Broker Load and routine Load), or the value of the 'function_column.sequence_col' expression (stream load), and the value column will be replaced according to this value. The value of the hidden column `__DORIS_SEQUENCE_COL__` can be set as a column in the source data or in the table structure. - -### Read - -The request with the value column needs to read the additional column of `__DORIS_SEQUENCE_COL__`, which is used as a basis for the order of replacement aggregation function replacement under the same key column, with the larger value replacing the smaller value and not the reverse. - -### Cumulative Compaction - -Cumulative Compaction works in the same way as the reading process - -### Base Compaction - -Base Compaction works in the same way as the reading process - -### Syntax -The syntax aspect of the table construction adds a property to the property identifying the type of `__DORIS_SEQUENCE_COL__`. -The syntax design aspect of the import is primarily the addition of a mapping from the sequence column to other columns, the settings of each import mode are described below - -#### Create Table -When you create the Uniq table, you can specify the sequence column type -``` -PROPERTIES ( - "function_column.sequence_type" = 'Date', -); -``` -The sequence_type is used to specify the type of the sequence column, which can be integral and time - -#### stream load - -The syntax of the stream load is to add the mapping of hidden columns corresponding to source_sequence in the 'function_column.sequence_col' field in the header, for example -``` -curl --location-trusted -u root -H "columns: k1,k2,source_sequence,v1,v2" -H "function_column.sequence_col: source_sequence" -T testData http://host:port/api/testDb/testTbl/_stream_load -``` - -#### broker load - -Set the source_sequence field for the hidden column map at `ORDER BY` - -``` -LOAD LABEL db1.label1 -( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl1` - COLUMNS TERMINATED BY "," - (k1,k2,source_sequence,v1,v2) - ORDER BY source_sequence -) -WITH BROKER 'broker' -( - "username"="user", - "password"="pass" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -#### routine load - -The mapping method is the same as above, as shown below - -``` - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - [WITH MERGE|APPEND|DELETE] - COLUMNS(k1, k2, source_sequence, v1, v2), - WHERE k1 > 100 and k2 like "%doris%" - [ORDER BY source_sequence] - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,0,200" - ); -``` - -## Enable sequence column support -If `function_column.sequence_type` is set when creating a new table, then the sequence column will be supported. -For a table that does not support sequence column, use the following statement if you would like to use this feature: -`ALTER TABLE example_db.my_table ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date")` to enable. -If you want to determine if a table supports sequence column, you can set the session variable to display the hidden column `SET show_hidden_columns=true`, followed by `desc Tablename`, if the output contains the column `__DORIS_SEQUENCE_COL__`, it is supported, if not, it is not supported - -## Usage example -Let's take the stream Load as an example to show how to use it -1. Create a table that supports sequence column. - -The table structure is shown below -``` -MySQL > desc test_table; -+-------------+--------------+------+-------+---------+---------+ -| Field | Type | Null | Key | Default | Extra | -+-------------+--------------+------+-------+---------+---------+ -| user_id | BIGINT | No | true | NULL | | -| date | DATE | No | true | NULL | | -| group_id | BIGINT | No | true | NULL | | -| modify_date | DATE | No | false | NULL | REPLACE | -| keyword | VARCHAR(128) | No | false | NULL | REPLACE | -+-------------+--------------+------+-------+---------+---------+ -``` - -2. Import data normally: - -Import the following data -``` -1 2020-02-22 1 2020-02-22 a -1 2020-02-22 1 2020-02-22 b -1 2020-02-22 1 2020-03-05 c -1 2020-02-22 1 2020-02-26 d -1 2020-02-22 1 2020-02-22 e -1 2020-02-22 1 2020-02-22 b -``` -Take the Stream Load as an example here and map the sequence column to the modify_date column -``` -curl --location-trusted -u root: -H "function_column.sequence_col: modify_date" -T testData http://host:port/api/test/test_table/_stream_load -``` -The results is -``` -MySQL > select * from test_table; -+---------+------------+----------+-------------+---------+ -| user_id | date | group_id | modify_date | keyword | -+---------+------------+----------+-------------+---------+ -| 1 | 2020-02-22 | 1 | 2020-03-05 | c | -+---------+------------+----------+-------------+---------+ -``` -In this import, the c is eventually retained in the keyword column because the value of the sequence column (the value in modify_date) is the maximum value: '2020-03-05'. - -3. Guarantee of substitution order - -After the above steps are completed, import the following data -``` -1 2020-02-22 1 2020-02-22 a -1 2020-02-22 1 2020-02-23 b -``` -Query data -``` -MySQL [test]> select * from test_table; -+---------+------------+----------+-------------+---------+ -| user_id | date | group_id | modify_date | keyword | -+---------+------------+----------+-------------+---------+ -| 1 | 2020-02-22 | 1 | 2020-03-05 | c | -+---------+------------+----------+-------------+---------+ -``` -Because the sequence column for the newly imported data are all smaller than the values already in the table, they cannot be replaced -Try importing the following data again -``` -1 2020-02-22 1 2020-02-22 a -1 2020-02-22 1 2020-03-23 w -``` -Query data -``` -MySQL [test]> select * from test_table; -+---------+------------+----------+-------------+---------+ -| user_id | date | group_id | modify_date | keyword | -+---------+------------+----------+-------------+---------+ -| 1 | 2020-02-22 | 1 | 2020-03-23 | w | -+---------+------------+----------+-------------+---------+ -``` -At this point, you can replace the original data in the table \ No newline at end of file diff --git a/docs/en/administrator-guide/load-data/spark-load-manual.md b/docs/en/administrator-guide/load-data/spark-load-manual.md deleted file mode 100644 index 3534be710d..0000000000 --- a/docs/en/administrator-guide/load-data/spark-load-manual.md +++ /dev/null @@ -1,632 +0,0 @@ ---- -{ - "title": "Spark Load", - "language": "en" -} ---- - - - -# Spark Load - -Spark load realizes the preprocessing of load data by spark, improves the performance of loading large amount of Doris data and saves the computing resources of Doris cluster. It is mainly used for the scene of initial migration and large amount of data imported into Doris. - -Spark load is an asynchronous load method. Users need to create spark type load job by MySQL protocol and view the load results by `show load`. - -## Applicable scenarios - -* The source data is in a file storage system that spark can access, such as HDFS. - -* The data volume ranges from tens of GB to TB. - -## Explanation of terms - -1. Frontend (FE): metadata and scheduling node of Doris system. In the load process, it is mainly responsible for the scheduling of load jobs. - -2. Backend (be): the computing and storage node of Doris system. In the load process, it is mainly responsible for data writing and storage. - -3. Spark ETL: in the load process, it is mainly responsible for ETL of data, including global dictionary construction (bitmap type), partition, sorting, aggregation, etc. - -4. Broker: broker is an independent stateless process. It encapsulates the file system interface and provides the ability of Doris to read the files in the remote storage system. - -5. Global dictionary: it stores the data structure from the original value to the coded value. The original value can be any data type, while the encoded value is an integer. The global dictionary is mainly used in the scene of precise de duplication precomputation. - -## Basic principles - -### Basic process - -The user submits spark type load job by MySQL client, Fe records metadata and returns that the user submitted successfully. - -The implementation of spark load task is mainly divided into the following five stages. - - -1. Fe schedules and submits ETL tasks to spark cluster for execution. - -2. Spark cluster executes ETL to complete the preprocessing of load data. It includes global dictionary building (bitmap type), partitioning, sorting, aggregation, etc. - -3. After the ETL task is completed, Fe obtains the data path of each partition that has been preprocessed, and schedules the related be to execute the push task. - -4. Be reads data through broker and converts it into Doris underlying storage format. - -5. Fe schedule the effective version and complete the load job. - -``` - + - | 0. User create spark load job - +----v----+ - | FE |---------------------------------+ - +----+----+ | - | 3. FE send push tasks | - | 5. FE publish version | - +------------+------------+ | - | | | | -+---v---+ +---v---+ +---v---+ | -| BE | | BE | | BE | |1. FE submit Spark ETL job -+---^---+ +---^---+ +---^---+ | - |4. BE push with broker | | -+---+---+ +---+---+ +---+---+ | -|Broker | |Broker | |Broker | | -+---^---+ +---^---+ +---^---+ | - | | | | -+---+------------+------------+---+ 2.ETL +-------------v---------------+ -| HDFS +-------> Spark cluster | -| <-------+ | -+---------------------------------+ +-----------------------------+ - -``` - -## Global dictionary - -### Applicable scenarios - -At present, the bitmap column in Doris is implemented using the class library `roaingbitmap`, while the input data type of `roaringbitmap` can only be integer. Therefore, if you want to pre calculate the bitmap column in the import process, you need to convert the type of input data to integer. - -In the existing Doris import process, the data structure of global dictionary is implemented based on hive table, which stores the mapping from original value to encoded value. - -### Build process - -1. Read the data from the upstream data source and generate a hive temporary table, which is recorded as `hive_table`. - -2. Extract the de duplicated values of the fields to be de duplicated from the `hive_table`, and generate a new hive table, which is marked as `distinct_value_table`. - -3. Create a new global dictionary table named `dict_table`; one column is the original value, and the other is the encoded value. - -4. Left join the `distinct_value_table` and `dict_table`, calculate the new de duplication value set, and then code this set with window function. At this time, the original value of the de duplication column will have one more column of encoded value. Finally, the data of these two columns will be written back to `dict_table`. - -5. Join the `dict_table` with the `hive_table` to replace the original value in the `hive_table` with the integer encoded value. - -6. `hive_table` will be read by the next data preprocessing process and imported into Doris after calculation. - -## Data preprocessing (DPP) - -### Basic process - -1. Read data from the data source. The upstream data source can be HDFS file or hive table. - -2. Map the read data, calculate the expression, and generate the bucket field `bucket_id` according to the partition information. - -3. Generate rolluptree according to rollup metadata of Doris table. - -4. Traverse rolluptree to perform hierarchical aggregation. The rollup of the next level can be calculated from the rollup of the previous level. - -5. After each aggregation calculation, the data will be calculated according to the `bucket_id`is divided into buckets and then written into HDFS. - -6. Subsequent brokers will pull the files in HDFS and import them into Doris be. - -## Hive Bitmap UDF - -Spark supports loading hive-generated bitmap data directly into Doris, see [hive-bitmap-udf documentation](../../extending-doris/hive-bitmap-udf.md) - -## Basic operation - -### Configure ETL cluster - -As an external computing resource, spark is used to complete ETL work in Doris. In the future, there may be other external resources that will be used in Doris, such as spark / GPU for query, HDFS / S3 for external storage, MapReduce for ETL, etc. Therefore, we introduce resource management to manage these external resources used by Doris. - -Before submitting the spark import task, you need to configure the spark cluster that performs the ETL task. - -Grammar: - -```sql --- create spark resource -CREATE EXTERNAL RESOURCE resource_name -PROPERTIES -( - type = spark, - spark_conf_key = spark_conf_value, - working_dir = path, - broker = broker_name, - broker.property_key = property_value -) - --- drop spark resource -DROP RESOURCE resource_name - --- show resources -SHOW RESOURCES -SHOW PROC "/resources" - --- privileges -GRANT USAGE_PRIV ON RESOURCE resource_name TO user_identity -GRANT USAGE_PRIV ON RESOURCE resource_name TO ROLE role_name - -REVOKE USAGE_PRIV ON RESOURCE resource_name FROM user_identity -REVOKE USAGE_PRIV ON RESOURCE resource_name FROM ROLE role_name -``` - -#### Create resource - -`resource_name` is the name of the spark resource configured in Doris. - -`Properties` are the parameters related to spark resources, as follows: - -- `type`: resource type, required. Currently, only spark is supported. - -- Spark related parameters are as follows: - - - `spark.master`: required, yarn is supported at present, `spark://host:port`. - - - `spark.submit.deployMode`: the deployment mode of Spark Program. It is required and supports cluster and client. - - - `spark.hadoop.yarn.resourcemanager.address`: required when master is yarn. - - - `spark.hadoop.fs.defaultfs`: required when master is yarn. - - - Other parameters are optional, refer to `http://spark.apache.org/docs/latest/configuration.html` - -- `working_dir`: directory used by ETL. Spark is required when used as an ETL resource. For example: `hdfs://host:port/tmp/doris`. - -- `broker`: the name of the broker. Spark is required when used as an ETL resource. You need to use the 'alter system add broker' command to complete the configuration in advance. - -- `broker.property_key`: the authentication information that the broker needs to specify when reading the intermediate file generated by ETL. - -Example: - -```sql --- yarn cluster 模式 -CREATE EXTERNAL RESOURCE "spark0" -PROPERTIES -( - "type" = "spark", - "spark.master" = "yarn", - "spark.submit.deployMode" = "cluster", - "spark.jars" = "xxx.jar,yyy.jar", - "spark.files" = "/tmp/aaa,/tmp/bbb", - "spark.executor.memory" = "1g", - "spark.yarn.queue" = "queue0", - "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999", - "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000", - "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", - "broker" = "broker0", - "broker.username" = "user0", - "broker.password" = "password0" -); - --- spark standalone client 模式 -CREATE EXTERNAL RESOURCE "spark1" -PROPERTIES -( - "type" = "spark", - "spark.master" = "spark://127.0.0.1:7777", - "spark.submit.deployMode" = "client", - "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", - "broker" = "broker1" -); -``` - -#### Show resources - -Ordinary accounts can only see the resources that they have `USAGE_PRIV` to use. - -The root and admin accounts can see all the resources. - -#### Resource privilege - -Resource permissions are managed by grant revoke. Currently, only `USAGE_PRIV` permission is supported. - -You can use the `USAGE_PRIV` permission is given to a user or a role, and the role is used the same as before. - -```sql --- Grant permission to the spark0 resource to user user0 - -GRANT USAGE_PRIV ON RESOURCE "spark0" TO "user0"@"%"; - - --- Grant permission to the spark0 resource to role ROLE0 - -GRANT USAGE_PRIV ON RESOURCE "spark0" TO ROLE "role0"; - - --- Grant permission to all resources to user user0 - -GRANT USAGE_PRIV ON RESOURCE * TO "user0"@"%"; - - --- Grant permission to all resources to role ROLE0 - -GRANT USAGE_PRIV ON RESOURCE * TO ROLE "role0"; - - --- Revoke the spark0 resource permission of user user0 - -REVOKE USAGE_PRIV ON RESOURCE "spark0" FROM "user0"@"%"; - -``` - -### Configure spark client - -The Fe submits the spark task by executing the spark submit command. Therefore, it is necessary to configure the spark client for Fe. It is recommended to use the official version of spark 2 above 2.4.5, [download spark here](https://archive.apache.org/dist/spark/). After downloading, please follow the steps to complete the following configuration. - -#### Configure SPARK_HOME environment variable - -Place the spark client on the same machine as Fe and configure `spark_home_default_dir` in the `fe.conf`. This configuration item defaults to the `fe/lib/spark2x` path. This config cannot be empty. - -#### Configure spark dependencies - -Package all jar packages in jars folder under spark client root path into a zip file, and configure `spark_resource_patj` in `fe.conf` as this zip file's path. - -When the spark load task is submitted, this zip file will be uploaded to the remote repository, and the default repository path will be hung in `working_dir/{cluster_ID}` directory named as `__spark_repository__{resource_name}`, which indicates that a resource in the cluster corresponds to a remote warehouse. The directory structure of the remote warehouse is as follows: - -``` -__spark_repository__spark0/ - |-__archive_1.0.0/ - | |-__lib_990325d2c0d1d5e45bf675e54e44fb16_spark-dpp-1.0.0-jar-with-dependencies.jar - | |-__lib_7670c29daf535efe3c9b923f778f61fc_spark-2x.zip - |-__archive_1.1.0/ - | |-__lib_64d5696f99c379af2bee28c1c84271d5_spark-dpp-1.1.0-jar-with-dependencies.jar - | |-__lib_1bbb74bb6b264a270bc7fca3e964160f_spark-2x.zip - |-__archive_1.2.0/ - | |-... -``` - -In addition to spark dependency (named by `spark-2x.zip` by default), Fe will also upload DPP's dependency package to the remote repository. If all the dependency files submitted by spark load already exist in the remote repository, then there is no need to upload dependency, saving the time of repeatedly uploading a large number of files each time. - -### Configure yarn client - -The Fe obtains the running application status and kills the application by executing the yarn command. Therefore, you need to configure the yarn client for Fe. It is recommended to use the official version of Hadoop above 2.5.2, [download hadoop](https://archive.apache.org/dist/hadoop/common/). After downloading, please follow the steps to complete the following configuration. - -#### Configure the yarn client path - -Place the downloaded yarn client in the same machine as Fe, and configure `yarn_client_path` in the `fe.conf` as the executable file of yarn, which is set as the `fe/lib/yarn-client/hadoop/bin/yarn` by default. - -(optional) when Fe obtains the application status or kills the application through the yarn client, the configuration files required for executing the yarn command will be generated by default in the `lib/yarn-config` path in the Fe root directory. This path can be configured by configuring `yarn-config-dir` in the `fe.conf`. The currently generated configuration yarn config files include `core-site.xml` and `yarn-site.xml`. - -### Create load - -Grammar: - -```sql -LOAD LABEL load_label - (data_desc, ...) - WITH RESOURCE resource_name resource_properties - [PROPERTIES (key1=value1, ... )] - -* load_label: - db_name.label_name - -* data_desc: - DATA INFILE ('file_path', ...) - [NEGATIVE] - INTO TABLE tbl_name - [PARTITION (p1, p2)] - [COLUMNS TERMINATED BY separator ] - [(col1, ...)] - [SET (k1=f1(xx), k2=f2(xx))] - [WHERE predicate] - -* resource_properties: - (key2=value2, ...) -``` - -Example 1: when the upstream data source is HDFS file - -```sql -LOAD LABEL db1.label1 -( - DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1") - INTO TABLE tbl1 - COLUMNS TERMINATED BY "," - (tmp_c1,tmp_c2) - SET - ( - id=tmp_c2, - name=tmp_c1 - ), - DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2") - INTO TABLE tbl2 - COLUMNS TERMINATED BY "," - (col1, col2) - where col1 > 1 -) -WITH RESOURCE 'spark0' -( - "spark.executor.memory" = "2g", - "spark.shuffle.compress" = "true" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -Example 2: when the upstream data source is hive table - -```sql -step 1:新建hive外部表 -CREATE EXTERNAL TABLE hive_t1 -( - k1 INT, - K2 SMALLINT, - k3 varchar(50), - uuid varchar(100) -) -ENGINE=hive -properties -( -"database" = "tmp", -"table" = "t1", -"hive.metastore.uris" = "thrift://0.0.0.0:8080" -); - -step 2: 提交load命令 -LOAD LABEL db1.label1 -( - DATA FROM TABLE hive_t1 - INTO TABLE tbl1 - (k1,k2,k3) - SET - ( - uuid=bitmap_dict(uuid) - ) -) -WITH RESOURCE 'spark0' -( - "spark.executor.memory" = "2g", - "spark.shuffle.compress" = "true" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -Example 3: when the upstream data source is hive binary type table - -```sql -step 1: create hive external table -CREATE EXTERNAL TABLE hive_t1 -( - k1 INT, - K2 SMALLINT, - k3 varchar(50), - uuid varchar(100) -) -ENGINE=hive -properties -( -"database" = "tmp", -"table" = "t1", -"hive.metastore.uris" = "thrift://0.0.0.0:8080" -); - -step 2: submit load command -LOAD LABEL db1.label1 -( - DATA FROM TABLE hive_t1 - INTO TABLE tbl1 - (k1,k2,k3) - SET - ( - uuid=binary_bitmap(uuid) - ) -) -WITH RESOURCE 'spark0' -( - "spark.executor.memory" = "2g", - "spark.shuffle.compress" = "true" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -You can view the details syntax about creating load by input `help spark load`. This paper mainly introduces the parameter meaning and precautions in the creation and load syntax of spark load. - -#### Label - -Identification of the import task. Each import task has a unique label within a single database. The specific rules are consistent with `broker load`. - -#### Data description parameters - -Currently, the supported data sources are CSV and hive table. Other rules are consistent with `broker load`. - -#### Load job parameters - -Load job parameters mainly refer to the `opt_properties` in the spark load. Load job parameters are applied to the entire load job. The rules are consistent with `broker load`. - -#### Spark resource parameters - -Spark resources need to be configured into the Doris system in advance, and users should be given `USAGE_PRIV`. Spark load can only be used after priv permission. - -When users have temporary requirements, such as adding resources for tasks and modifying spark configs, you can set them here. The settings only take effect for this task and do not affect the existing configuration in the Doris cluster. - -```sql -WITH RESOURCE 'spark0' -( - "spark.driver.memory" = "1g", - "spark.executor.memory" = "3g" -) -``` - -#### Load when data source is hive table - -At present, if you want to use hive table as a data source in the import process, you need to create an external table of type hive, - -Then you can specify the table name of the external table when submitting the Load command. - -#### Load process to build global dictionary - -The data type applicable to the aggregate columns of the Doris table is of type bitmap. - -In the load command, you can specify the field to build a global dictionary. The format is: '```doris field name=bitmap_dict(hive_table field name)``` - -It should be noted that the construction of global dictionary is supported only when the upstream data source is hive table. - -#### Load when data source is hive binary type table - -The data type applicable to the aggregate column of the doris table is bitmap type, and the data type of the corresponding column in the hive table of the data source is binary (through the org.apache.doris.load.loadv2.dpp.BitmapValue (FE spark-dpp) class serialized) type. - -There is no need to build a global dictionary, just specify the corresponding field in the load command, the format is: ```doris field name=binary_bitmap (hive table field name)``` - -Similarly, the binary (bitmap) type of data import is currently only supported when the upstream data source is a hive table. - -### Show load - -Spark load is asynchronous just like broker load, so the user must create the load label record and use label in the **show load command to view the load result**. The show load command is common in all load types. The specific syntax can be viewed by executing help show load. - -Example: - -``` -mysql> show load order by createtime desc limit 1\G -*************************** 1. row *************************** - JobId: 76391 - Label: label1 - State: FINISHED - Progress: ETL:100%; LOAD:100% - Type: SPARK - EtlInfo: unselected.rows=4; dpp.abnorm.ALL=15; dpp.norm.ALL=28133376 - TaskInfo: cluster:cluster0; timeout(s):10800; max_filter_ratio:5.0E-5 - ErrorMsg: N/A - CreateTime: 2019-07-27 11:46:42 - EtlStartTime: 2019-07-27 11:46:44 - EtlFinishTime: 2019-07-27 11:49:44 - LoadStartTime: 2019-07-27 11:49:44 -LoadFinishTime: 2019-07-27 11:50:16 - URL: http://1.1.1.1:8089/proxy/application_1586619723848_0035/ - JobDetails: {"ScannedRows":28133395,"TaskNumber":1,"FileNumber":1,"FileSize":200000} -``` - -Refer to broker load for the meaning of parameters in the returned result set. The differences are as follows: - -+ State - -The current phase of the load job. After the job is submitted, the status is pending. After the spark ETL is submitted, the status changes to ETL. After ETL is completed, Fe schedules be to execute push operation, and the status changes to finished after the push is completed and the version takes effect. - -There are two final stages of the load job: cancelled and finished. When the load job is in these two stages, the load is completed. Among them, cancelled is load failure, finished is load success. - -+ Progress - -Progress description of the load job. There are two kinds of progress: ETL and load, corresponding to the two stages of the load process, ETL and loading. - -The progress range of load is 0 ~ 100%. - -```Load progress = the number of tables that have completed all replica imports / the total number of tables in this import task * 100%``` - -**If all load tables are loaded, the progress of load is 99%**, the load enters the final effective stage. After the whole load is completed, the load progress will be changed to 100%. - -The load progress is not linear. Therefore, if the progress does not change over a period of time, it does not mean that the load is not in execution. - -+ Type - -Type of load job. Spark load is spark. - -+ CreateTime/EtlStartTime/EtlFinishTime/LoadStartTime/LoadFinishTime - -These values represent the creation time of the load, the start time of the ETL phase, the completion time of the ETL phase, the start time of the loading phase, and the completion time of the entire load job. - -+ JobDetails - -Display the detailed running status of some jobs, which will be updated when ETL ends. It includes the number of loaded files, the total size (bytes), the number of subtasks, the number of processed original lines, etc. - -```{"ScannedRows":139264,"TaskNumber":1,"FileNumber":1,"FileSize":940754064}``` - -+ URL - -Copy this url to the browser and jump to the web interface of the corresponding application. - -### View spark launcher commit log - -Sometimes users need to view the detailed logs generated during the spark submission process. The logs are saved in the `log/spark_launcher_log` under the Fe root directory named as `spark_launcher_{load_job_id}_{label}.log`. The log will be saved in this directory for a period of time. When the load information in Fe metadata is cleaned up, the corresponding log will also be cleaned. The default saving log time is 3 days. - -### cancel load - -When the spark load job status is not cancelled or finished, it can be manually cancelled by the user. When canceling, you need to specify the label to cancel the load job. The syntax of the cancel load command can be viewed by executing `help cancel load`. - -## Related system configuration - -### FE configuration - -The following configuration belongs to the system level configuration of spark load, that is, the configuration for all spark load import tasks. Mainly through modification``` fe.conf ``` to modify the configuration value. - -+ `enable_spark_load` - -Open spark load and create resource. The default value is false. This feature is turned off. - -+ `spark_load_default_timeout_second` - -The default timeout for tasks is 259200 seconds (3 days). - -+ `spark_home_default_dir` - -Spark client path (`Fe/lib/spark2x`). - -+ `spark_resource_path` - -The path of the packaged spark dependent file (empty by default). - -+ `spark_launcher_log_dir` - -The directory where the spark client's commit log is stored (`Fe/log/spark)_launcher_log`). - -+ `yarn_client_path` - -The path of the yarn binary executable file (`Fe/lib/yarn-client/Hadoop/bin/yarn`). - -+ `yarn_config_dir` - -The path to generate the yarn configuration file (`Fe/lib/yarn-config`). - -## Best practices - -### Application scenarios - -The most suitable scenario to use spark load is that the raw data is in the file system (HDFS), and the amount of data is tens of GB to TB. Stream load or broker load is recommended for small amount of data. - -## FAQ - -* When using spark load, the `HADOOP_CONF_DIR` environment variable is no set in the `spark-env.sh`. - -If the `HADOOP_CONF_DIR` environment variable is not set, the error `When running with master 'yarn' either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment` will be reported. - -* When using spark load, the `spark_home_default_dir` does not specify correctly. - -The spark submit command is used when submitting a spark job. If `spark_home_default_dir` is set incorrectly, an error `Cannot run program 'xxx/bin/spark_submit', error = 2, no such file or directory` will be reported. - -* When using spark load, `spark_resource_path` does not point to the packaged zip file. - -If `spark_resource_path` is not set correctly. An error `file XXX/jars/spark-2x.zip` does not exist will be reported. - -* When using spark load `yarn_client_path` does not point to a executable file of yarn. - -If `yarn_client_path` is not set correctly. An error `yarn client does not exist in path: XXX/yarn-client/hadoop/bin/yarn` will be reported. diff --git a/docs/en/administrator-guide/load-data/stream-load-manual.md b/docs/en/administrator-guide/load-data/stream-load-manual.md deleted file mode 100644 index 83303c6f35..0000000000 --- a/docs/en/administrator-guide/load-data/stream-load-manual.md +++ /dev/null @@ -1,374 +0,0 @@ ---- -{ - "title": "Stream load", - "language": "en" -} ---- - - - -# Stream load - -Stream load is a synchronous way of importing. Users import local files or data streams into Doris by sending HTTP protocol requests. Stream load synchronously executes the import and returns the import result. Users can directly determine whether the import is successful by the return body of the request. - -Stream load is mainly suitable for importing local files or data from data streams through procedures. - -## Basic Principles - -The following figure shows the main flow of Stream load, omitting some import details. - -``` - ^ + - | | - | | 1A. User submit load to FE - | | - | +--v-----------+ - | | FE | -5. Return result to user | +--+-----------+ - | | - | | 2. Redirect to BE - | | - | +--v-----------+ - +---+Coordinator BE| 1B. User submit load to BE - +-+-----+----+-+ - | | | - +-----+ | +-----+ - | | | 3. Distrbute data - | | | - +-v-+ +-v-+ +-v-+ - |BE | |BE | |BE | - +---+ +---+ +---+ -``` - -In Stream load, Doris selects a node as the Coordinator node. This node is responsible for receiving data and distributing data to other data nodes. - -Users submit import commands through HTTP protocol. If submitted to FE, FE forwards the request to a BE via the HTTP redirect instruction. Users can also submit import commands directly to a specified BE. - -The final result of the import is returned to the user by Coordinator BE. - -## Support data format - -Currently Stream Load supports two data formats: CSV (text) and JSON - -## Basic operations -### Create a Load - -Stream load submits and transfers data through HTTP protocol. Here, the `curl` command shows how to submit an import. - -Users can also operate through other HTTP clients. - -``` -curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load - -The properties supported in the header are described in "Load Parameters" below -The format is: - H "key1: value1" -``` - -Examples: - -``` -curl --location-trusted -u root -T date -H "label:123" http://abc.com:8030/api/test/date/_stream_load -``` -The detailed syntax for creating imports helps to execute ``HELP STREAM LOAD`` view. The following section focuses on the significance of creating some parameters of Stream load. - -#### Signature parameters - -+ user/passwd - - Stream load uses the HTTP protocol to create the imported protocol and signs it through the Basic Access authentication. The Doris system verifies user identity and import permissions based on signatures. - -#### Load Parameters - -Stream load uses HTTP protocol, so all parameters related to import tasks are set in the header. The significance of some parameters of the import task parameters of Stream load is mainly introduced below. - -+ label - - Identity of import task. Each import task has a unique label inside a single database. Label is a user-defined name in the import command. With this label, users can view the execution of the corresponding import task. - - Another function of label is to prevent users from importing the same data repeatedly. **It is strongly recommended that users use the same label for the same batch of data. This way, repeated requests for the same batch of data will only be accepted once, guaranteeing at-Most-Once** - - When the corresponding import operation state of label is CANCELLED, the label can be used again. - - -+ column_separator - - Used to specify the column separator in the load file. The default is `\t`. If it is an invisible character, you need to add `\x` as a prefix and hexadecimal to indicate the separator. - - For example, the separator `\x01` of the hive file needs to be specified as `-H "column_separator:\x01"`. - - You can use a combination of multiple characters as the column separator. - -+ line_delimiter - - Used to specify the line delimiter in the load file. The default is `\n`. - - You can use a combination of multiple characters as the column separator. - -+ max\_filter\_ratio - - The maximum tolerance rate of the import task is 0 by default, and the range of values is 0-1. When the import error rate exceeds this value, the import fails. - - If the user wishes to ignore the wrong row, the import can be successful by setting this parameter greater than 0. - - The calculation formula is as follows: - - ``` (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) > max_filter_ratio ``` - - ``` dpp.abnorm.ALL``` denotes the number of rows whose data quality is not up to standard. Such as type mismatch, column mismatch, length mismatch and so on. - - ``` dpp.norm.ALL ``` refers to the number of correct data in the import process. The correct amount of data for the import task can be queried by the `SHOW LOAD` command. - -The number of rows in the original file = `dpp.abnorm.ALL + dpp.norm.ALL` - -+ where - - Import the filter conditions specified by the task. Stream load supports filtering of where statements specified for raw data. The filtered data will not be imported or participated in the calculation of filter ratio, but will be counted as `num_rows_unselected`. - -+ partition - - Partition information for tables to be imported will not be imported if the data to be imported does not belong to the specified Partition. These data will be included in `dpp.abnorm.ALL`. - -+ columns - - The function transformation configuration of data to be imported includes the sequence change of columns and the expression transformation, in which the expression transformation method is consistent with the query statement. - - ``` - Examples of column order transformation: There are three columns of original data (src_c1,src_c2,src_c3), and there are also three columns (dst_c1,dst_c2,dst_c3) in the doris table at present. - when the first column src_c1 of the original file corresponds to the dst_c1 column of the target table, while the second column src_c2 of the original file corresponds to the dst_c2 column of the target table and the third column src_c3 of the original file corresponds to the dst_c3 column of the target table,which is written as follows: - columns: dst_c1, dst_c2, dst_c3 - - when the first column src_c1 of the original file corresponds to the dst_c2 column of the target table, while the second column src_c2 of the original file corresponds to the dst_c3 column of the target table and the third column src_c3 of the original file corresponds to the dst_c1 column of the target table,which is written as follows: - columns: dst_c2, dst_c3, dst_c1 - - Example of expression transformation: There are two columns in the original file and two columns in the target table (c1, c2). However, both columns in the original file need to be transformed by functions to correspond to the two columns in the target table. - columns: tmp_c1, tmp_c2, c1 = year(tmp_c1), c2 = mouth(tmp_c2) - Tmp_* is a placeholder, representing two original columns in the original file. - ``` - -+ exec\_mem\_limit - - Memory limit. Default is 2GB. Unit is Bytes - -+ merge\_type - The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics - -+ two\_phase\_commit - - Stream load supports the two-phase commit mode.The mode could be enabled by declaring ```two_phase_commit=true``` in http header. This mode is disabled by default. - the two-phase commit mode means: During Stream load, after data is written, the message will be returned to the client, the data is invisible at this point and the transaction status is PRECOMMITTED. The data will be visible only after COMMIT is triggered by client. - - 1. User can invoke the following interface to trigger commit operations for transaction: - ``` - curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:commit" http://fe_host:http_port/api/{db}/_stream_load_2pc - ``` - or - ``` - curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:commit" http://be_host:webserver_port/api/{db}/_stream_load_2pc - ``` - - 2. User can invoke the following interface to trigger abort operations for transaction: - ``` - curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:abort" http://fe_host:http_port/api/{db}/_stream_load_2pc - ``` - or - ``` - curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:abort" http://be_host:webserver_port/api/{db}/_stream_load_2pc - ``` - -### Return results - -Since Stream load is a synchronous import method, the result of the import is directly returned to the user by creating the return value of the import. - -Examples: - -``` -{ - "TxnId": 1003, - "Label": "b6f3bc78-0d2c-45d9-9e4c-faa0a0149bee", - "Status": "Success", - "ExistingJobStatus": "FINISHED", // optional - "Message": "OK", - "NumberTotalRows": 1000000, - "NumberLoadedRows": 1000000, - "NumberFilteredRows": 1, - "NumberUnselectedRows": 0, - "LoadBytes": 40888898, - "LoadTimeMs": 2144, - "BeginTxnTimeMs": 1, - "StreamLoadPutTimeMs": 2, - "ReadDataTimeMs": 325, - "WriteDataTimeMs": 1933, - "CommitAndPublishTimeMs": 106, - "ErrorURL": "http://192.168.1.1:8042/api/_load_error_log?file=__shard_0/error_log_insert_stmt_db18266d4d9b4ee5-abb00ddd64bdf005_db18266d4d9b4ee5_abb00ddd64bdf005" -} -``` - -The following main explanations are given for the Stream load import result parameters: - -+ TxnId: The imported transaction ID. Users do not perceive. - -+ Label: Import Label. User specified or automatically generated by the system. - -+ Status: Import completion status. - - "Success": Indicates successful import. - - "Publish Timeout": This state also indicates that the import has been completed, except that the data may be delayed and visible without retrying. - - "Label Already Exists": Label duplicate, need to be replaced Label. - - "Fail": Import failed. - -+ ExistingJobStatus: The state of the load job corresponding to the existing Label. - - This field is displayed only when the status is "Label Already Exists". The user can know the status of the load job corresponding to Label through this state. "RUNNING" means that the job is still executing, and "FINISHED" means that the job is successful. - -+ Message: Import error messages. - -+ NumberTotalRows: Number of rows imported for total processing. - -+ NumberLoadedRows: Number of rows successfully imported. - -+ NumberFilteredRows: Number of rows that do not qualify for data quality. - -+ NumberUnselectedRows: Number of rows filtered by where condition. - -+ LoadBytes: Number of bytes imported. - -+ LoadTimeMs: Import completion time. Unit milliseconds. - -+ BeginTxnTimeMs: The time cost for RPC to Fe to begin a transaction, Unit milliseconds. - -+ StreamLoadPutTimeMs: The time cost for RPC to Fe to get a stream load plan, Unit milliseconds. - -+ ReadDataTimeMs: Read data time, Unit milliseconds. - -+ WriteDataTimeMs: Write data time, Unit milliseconds. - -+ CommitAndPublishTimeMs: The time cost for RPC to Fe to commit and publish a transaction, Unit milliseconds. - -+ ErrorURL: If you have data quality problems, visit this URL to see specific error lines. - -> Note: Since Stream load is a synchronous import mode, import information will not be recorded in Doris system. Users cannot see Stream load asynchronously by looking at import commands. You need to listen for the return value of the create import request to get the import result. - -### Cancel Load - -Users can't cancel Stream load manually. Stream load will be cancelled automatically by the system after a timeout or import error. - -## Relevant System Configuration - -### FE configuration - -+ stream\_load\_default\_timeout\_second - - The timeout time of the import task (in seconds) will be cancelled by the system if the import task is not completed within the set timeout time, and will become CANCELLED. - - At present, Stream load does not support custom import timeout time. All Stream load import timeout time is uniform. The default timeout time is 300 seconds. If the imported source file can no longer complete the import within the specified time, the FE parameter ```stream_load_default_timeout_second``` needs to be adjusted. - -### BE configuration - -+ streaming\_load\_max\_mb - - The maximum import size of Stream load is 10G by default, in MB. If the user's original file exceeds this value, the BE parameter ```streaming_load_max_mb``` needs to be adjusted. - -## Best Practices - -### Application scenarios - -The most appropriate scenario for using Stream load is that the original file is in memory or on disk. Secondly, since Stream load is a synchronous import method, users can also use this import if they want to obtain the import results in a synchronous manner. - -### Data volume - -Since Stream load is based on the BE initiative to import and distribute data, the recommended amount of imported data is between 1G and 10G. Since the default maximum Stream load import data volume is 10G, the configuration of BE ```streaming_load_max_mb``` needs to be modified if files exceeding 10G are to be imported. - -``` -For example, the size of the file to be imported is 15G -Modify the BE configuration streaming_load_max_mb to 16000 -``` - -Stream load default timeout is 300 seconds, according to Doris currently the largest import speed limit, about more than 3G files need to modify the import task default timeout. - -``` -Import Task Timeout = Import Data Volume / 10M / s (Specific Average Import Speed Requires Users to Calculate Based on Their Cluster Conditions) -For example, import a 10G file -Timeout = 1000s -31561;. 20110G / 10M /s -``` - -### Complete examples -Data situation: In the local disk path / home / store_sales of the sending and importing requester, the imported data is about 15G, and it is hoped to be imported into the table store\_sales of the database bj_sales. - -Cluster situation: The concurrency of Stream load is not affected by cluster size. - -+ Step 1: Does the import file size exceed the default maximum import size of 10G - - ``` - BE conf - streaming_load_max_mb = 16000 - ``` -+ Step 2: Calculate whether the approximate import time exceeds the default timeout value - - ``` - Import time 15000/10 = 1500s - Over the default timeout time, you need to modify the FE configuration - stream_load_default_timeout_second = 1500 - ``` - -+ Step 3: Create Import Tasks - - ``` - curl --location-trusted -u user:password -T /home/store_sales -H "label:abc" http://abc.com:8000/api/bj_sales/store_sales/_stream_load - ``` - -## Common Questions - -* Label Already Exists - - The Label repeat checking steps of Stream load are as follows: - - 1. Is there an import Label conflict that already exists with other import methods? - - Because imported Label in Doris system does not distinguish between import methods, there is a problem that other import methods use the same Label. - - Through ``SHOW LOAD WHERE LABEL = "xxx"'``, where XXX is a duplicate Label string, see if there is already a Label imported by FINISHED that is the same as the Label created by the user. - - 2. Are Stream loads submitted repeatedly for the same job? - - Since Stream load is an HTTP protocol submission creation import task, HTTP Clients in various languages usually have their own request retry logic. After receiving the first request, the Doris system has started to operate Stream load, but because the result is not returned to the Client side in time, the Client side will retry to create the request. At this point, the Doris system is already operating on the first request, so the second request will be reported to Label Already Exists. - - To sort out the possible methods mentioned above: Search FE Master's log with Label to see if there are two ``redirect load action to destination = ``redirect load action to destination cases in the same Label. If so, the request is submitted repeatedly by the Client side. - - It is recommended that the user calculate the approximate import time based on the amount of data currently requested, and change the request overtime on the client side to a value greater than the import timeout time according to the import timeout time to avoid multiple submissions of the request by the client side. - - 3. Connection reset abnormal - - In the community version 0.14.0 and earlier versions, the connection reset exception occurred after Http V2 was enabled, because the built-in web container is tomcat, and Tomcat has pits in 307 (Temporary Redirect). There is a problem with the implementation of this protocol. All In the case of using Stream load to import a large amount of data, a connect reset exception will occur. This is because tomcat started data transmission before the 307 jump, which resulted in the lack of authentication information when the BE received the data request. Later, changing the built-in container to Jetty solved this problem. If you encounter this problem, please upgrade your Doris or disable Http V2 (`enable_http_server_v2=false`). - - After the upgrade, also upgrade the http client version of your program to `4.5.13`,Introduce the following dependencies in your pom.xml file - - ```xml - - org.apache.httpcomponents - httpclient - 4.5.13 - - ``` - - - diff --git a/docs/en/administrator-guide/materialized_view.md b/docs/en/administrator-guide/materialized_view.md deleted file mode 100644 index 243fcdfeae..0000000000 --- a/docs/en/administrator-guide/materialized_view.md +++ /dev/null @@ -1,486 +0,0 @@ ---- -{ - "title": "Materialized view", - "language": "en" -} ---- - - - -# Materialized view -A materialized view is a data set that is pre-calculated (according to a defined SELECT statement) and stored in a special table in Doris. - -The emergence of materialized views is mainly to satisfy users. It can analyze any dimension of the original detailed data, but also can quickly analyze and query fixed dimensions. - -## When to use materialized view - -+ Analyze requirements to cover both detailed data query and fixed-dimensional query. -+ The query only involves a small part of the columns or rows in the table. -+ The query contains some time-consuming processing operations, such as long-time aggregation operations. -+ The query needs to match different prefix indexes. - -## Advantage - -+ For those queries that frequently use the same sub-query results repeatedly, the performance is greatly improved -+ Doris automatically maintains the data of the materialized view, whether it is a new import or delete operation, it can ensure the data consistency of the base table and the materialized view table. No need for any additional labor maintenance costs. -+ When querying, it will automatically match the optimal materialized view and read data directly from the materialized view. - -*Automatic maintenance of materialized view data will cause some maintenance overhead, which will be explained in the limitations of materialized views later.* - -## Materialized View VS Rollup - -Before the materialized view function, users generally used the Rollup function to improve query efficiency through pre-aggregation. However, Rollup has certain limitations. It cannot do pre-aggregation based on the detailed model. - -Materialized views cover the functions of Rollup while also supporting richer aggregate functions. So the materialized view is actually a superset of Rollup. - -In other words, the functions previously supported by the `ALTER TABLE ADD ROLLUP` syntax can now be implemented by `CREATE MATERIALIZED VIEW`. - -## Use materialized views - -The Doris system provides a complete set of DDL syntax for materialized views, including creating, viewing, and deleting. The syntax of DDL is consistent with PostgreSQL and Oracle. - -### Create a materialized view - -Here you must first decide what kind of materialized view to create based on the characteristics of your query statement. This is not to say that your materialized view definition is exactly the same as one of your query statements. There are two principles here: - -1. **Abstract** from the query statement, the grouping and aggregation methods shared by multiple queries are used as the definition of the materialized view. -2. It is not necessary to create materialized views for all dimension combinations. - -First of all, the first point, if a materialized view is abstracted, and multiple queries can be matched to this materialized view. This materialized view works best. Because the maintenance of the materialized view itself also consumes resources. - -If the materialized view only fits a particular query, and other queries do not use this materialized view. As a result, the materialized view is not cost-effective, which not only occupies the storage resources of the cluster, but cannot serve more queries. - -Therefore, users need to combine their own query statements and data dimension information to abstract the definition of some materialized views. - -The second point is that in the actual analysis query, not all dimensional analysis will be covered. Therefore, it is enough to create a materialized view for the commonly used combination of dimensions, so as to achieve a space and time balance. - -Creating a materialized view is an asynchronous operation, which means that after the user successfully submits the creation task, Doris will calculate the existing data in the background until the creation is successful. - -The specific syntax can be viewed through the following command: - -``` -HELP CREATE MATERIALIZED VIEW -``` - -### Support aggregate functions - -The aggregate functions currently supported by the materialized view function are: - -+ SUM, MIN, MAX (Version 0.12) -+ COUNT, BITMAP\_UNION, HLL\_UNION (Version 0.13) - -+ The form of BITMAP\_UNION must be: `BITMAP_UNION(TO_BITMAP(COLUMN))` The column type can only be an integer (largeint also does not support), or `BITMAP_UNION(COLUMN)` and the base table is an AGG model. -+ The form of HLL\_UNION must be: `HLL_UNION(HLL_HASH(COLUMN))` The column type cannot be DECIMAL, or `HLL_UNION(COLUMN)` and the base table is an AGG model. - -### Update strategy - -In order to ensure the data consistency between the materialized view table and the Base table, Doris will import, delete and other operations on the Base table are synchronized to the materialized view table. And through incremental update to improve update efficiency. To ensure atomicity through transaction. - -For example, if the user inserts data into the base table through the INSERT command, this data will be inserted into the materialized view synchronously. When both the base table and the materialized view table are written successfully, the INSERT command will return successfully. - -### Query automatic matching - -After the materialized view is successfully created, the user's query does not need to be changed, that is, it is still the base table of the query. Doris will automatically select an optimal materialized view based on the current query statement, read data from the materialized view and calculate it. - -Users can use the EXPLAIN command to check whether the current query uses a materialized view. - -The matching relationship between the aggregation in the materialized view and the aggregation in the query: - -| Materialized View Aggregation | Query Aggregation | -| ---------- | -------- | -| sum | sum | -| min | min | -| max | max | -| count | count | -| bitmap\_union | bitmap\_union, bitmap\_union\_count, count(distinct) | -| hll\_union | hll\_raw\_agg, hll\_union\_agg, ndv, approx\_count\_distinct | - -After the aggregation functions of bitmap and hll match the materialized view in the query, the aggregation operator of the query will be rewritten according to the table structure of the materialized view. See example 2 for details. - -### Query materialized views - -Check what materialized views the current table has, and what their table structure is. Through the following command: - -``` -MySQL [test]> desc mv_test all; -+-----------+---------------+-----------------+----------+------+-------+---------+--------------+ -| IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra | -+-----------+---------------+-----------------+----------+------+-------+---------+--------------+ -| mv_test | DUP_KEYS | k1 | INT | Yes | true | NULL | | -| | | k2 | BIGINT | Yes | true | NULL | | -| | | k3 | LARGEINT | Yes | true | NULL | | -| | | k4 | SMALLINT | Yes | false | NULL | NONE | -| | | | | | | | | -| mv_2 | AGG_KEYS | k2 | BIGINT | Yes | true | NULL | | -| | | k4 | SMALLINT | Yes | false | NULL | MIN | -| | | k1 | INT | Yes | false | NULL | MAX | -| | | | | | | | | -| mv_3 | AGG_KEYS | k1 | INT | Yes | true | NULL | | -| | | to_bitmap(`k2`) | BITMAP | No | false | | BITMAP_UNION | -| | | | | | | | | -| mv_1 | AGG_KEYS | k4 | SMALLINT | Yes | true | NULL | | -| | | k1 | BIGINT | Yes | false | NULL | SUM | -| | | k3 | LARGEINT | Yes | false | NULL | SUM | -| | | k2 | BIGINT | Yes | false | NULL | MIN | -+-----------+---------------+-----------------+----------+------+-------+---------+--------------+ -``` - -You can see that the current `mv_test` table has three materialized views: mv\_1, mv\_2 and mv\_3, and their table structure. - -### Delete materialized view - -If the user no longer needs the materialized view, you can delete the materialized view by 'DROP' commen. - -The specific syntax can be viewed through the following command: - -``` -HELP DROP MATERIALIZED VIEW -``` - -## Best Practice 1 - -The use of materialized views is generally divided into the following steps: - -1. Create a materialized view -2. Asynchronously check whether the materialized view has been constructed -3. Query and automatically match materialized views - -**First is the first step: Create a materialized view** - -Assume that the user has a sales record list, which stores the transaction id, salesperson, sales store, sales time, and amount of each transaction. The table building statement is: - -``` -create table sales_records(record_id int, seller_id int, store_id int, sale_date date, sale_amt bigint) distributed by hash(record_id) properties("replication_num" = "1"); -``` -The table structure of this `sales_records` is as follows: - -``` -MySQL [test]> desc sales_records; -+-----------+--------+------+-------+---------+--- ----+ -| Field | Type | Null | Key | Default | Extra | -+-----------+--------+------+-------+---------+--- ----+ -| record_id | INT | Yes | true | NULL | | -| seller_id | INT | Yes | true | NULL | | -| store_id | INT | Yes | true | NULL | | -| sale_date | DATE | Yes | false | NULL | NONE | -| sale_amt | BIGINT | Yes | false | NULL | NONE | -+-----------+--------+------+-------+---------+--- ----+ -``` - -At this time, if the user often performs an analysis query on the sales volume of different stores, you can create a materialized view for the `sales_records` table to group the sales stores and sum the sales of the same sales stores. The creation statement is as follows: - -``` -MySQL [test]> create materialized view store_amt as select store_id, sum(sale_amt) from sales_records group by store_id; -``` - -The backend returns to the following figure, indicating that the task of creating a materialized view is submitted successfully. - -``` -Query OK, 0 rows affected (0.012 sec) -``` - -**Step 2: Check whether the materialized view has been built** - -Since the creation of a materialized view is an asynchronous operation, after the user submits the task of creating a materialized view, he needs to asynchronously check whether the materialized view has been constructed through a command. The command is as follows: - -``` -SHOW ALTER TABLE ROLLUP FROM db_name; (Version 0.12) -SHOW ALTER TABLE MATERIALIZED VIEW FROM db_name; (Version 0.13) -``` - -In this command, `db_name` is a parameter, you need to replace it with your real db name. The result of the command is to display all the tasks of creating a materialized view of this db. The results are as follows: - -``` -+-------+---------------+---------------------+--- ------------------+---------------+--------------- --+----------+---------------+-----------+-------- -------------------------------------------------- -------------------------------------------------- -------------+----------+---------+ -| JobId | TableName | CreateTime | FinishedTime | BaseIndexName | RollupIndexName | RollupId | TransactionId | State | Msg | Progress | Timeout | -+-------+---------------+---------------------+--- ------------------+---------------+--------------- --+----------+---------------+-----------+-------- -------------------------------------------------- -------------------------------------------------- -------------+----------+---------+ -| 22036 | sales_records | 2020-07-30 20:04:28 | 2020-07-30 20:04:57 | sales_records | store_amt | 22037 | 5008 | FINISHED | | NULL | 86400 | -+-------+---------------+---------------------+--- ------------------+---------------+--------------- --+----------+---------------+-----------+-------- ---------------------------------------- - -``` - -Among them, TableName refers to which table the data of the materialized view comes from, and RollupIndexName refers to the name of the materialized view. One of the more important indicators is State. - -When the State of the task of creating a materialized view has become FINISHED, it means that the materialized view has been created successfully. This means that it is possible to automatically match this materialized view when querying. - -**Step 3: Query** - -After the materialized view is created, when users query the sales volume of different stores, they will directly read the aggregated data from the materialized view `store_amt` just created. To achieve the effect of improving query efficiency. - -The user's query still specifies the query `sales_records` table, for example: - -``` -SELECT store_id, sum(sale_amt) FROM sales_records GROUP BY store_id; -``` - -The above query will automatically match `store_amt`. The user can use the following command to check whether the current query matches the appropriate materialized view. - -``` -EXPLAIN SELECT store_id, sum(sale_amt) FROM sales_records GROUP BY store_id; -+-----------------------------------------------------------------------------+ -| Explain String | -+-----------------------------------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS: `store_id` | sum(`sale_amt`) | -| PARTITION: UNPARTITIONED | -| | -| RESULT SINK | -| | -| 4:EXCHANGE | -| | -| PLAN FRAGMENT 1 | -| OUTPUT EXPRS: | -| PARTITION: HASH_PARTITIONED: `store_id` | -| | -| STREAM DATA SINK | -| EXCHANGE ID: 04 | -| UNPARTITIONED | -| | -| 3:AGGREGATE (merge finalize) | -| | output: sum( sum(`sale_amt`)) | -| | group by: `store_id` | -| | | -| 2:EXCHANGE | -| | -| PLAN FRAGMENT 2 | -| OUTPUT EXPRS: | -| PARTITION: RANDOM | -| | -| STREAM DATA SINK | -| EXCHANGE ID: 02 | -| HASH_PARTITIONED: `store_id` | -| | -| 1:AGGREGATE (update serialize) | -| | STREAMING | -| | output: sum(`sale_amt`) | -| | group by: `store_id` | -| | | -| 0:OlapScanNode | -| TABLE: sales_records | -| PREAGGREGATION: ON | -| partitions=1/1 | -| rollup: store_amt | -| tabletRatio=10/10 | -| tabletList=22038,22040,22042,22044,22046,22048,22050,22052,22054,22056 | -| cardinality=0 | -| avgRowSize=0.0 | -| numNodes=1 | -+-----------------------------------------------------------------------------+ -45 rows in set (0.006 sec) -``` -The final thing is the rollup attribute in OlapScanNode. You can see that the rollup of the current query shows `store_amt`. That is to say, the query has been correctly matched to the materialized view `store_amt`, and data is read directly from the materialized view. - -## Best Practice 2 PV,UV - -Business scenario: Calculate the UV and PV of advertising - -Assuming that the user's original ad click data is stored in Doris, then for ad PV and UV queries, the query speed can be improved by creating a materialized view of `bitmap_union`. - -Use the following statement to first create a table that stores the details of the advertisement click data, including the click event of each click, what advertisement was clicked, what channel clicked, and who was the user who clicked. - -``` -MySQL [test]> create table advertiser_view_record(time date, advertiser varchar(10), channel varchar(10), user_id int) distributed by hash(time) properties("replication_num" = "1"); -Query OK, 0 rows affected (0.014 sec) -``` -The original ad click data table structure is: - -``` -MySQL [test]> desc advertiser_view_record; -+------------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+------------+-------------+------+-------+---------+-------+ -| time | DATE | Yes | true | NULL | | -| advertiser | VARCHAR(10) | Yes | true | NULL | | -| channel | VARCHAR(10) | Yes | false | NULL | NONE | -| user_id | INT | Yes | false | NULL | NONE | -+------------+-------------+------+-------+---------+-------+ -4 rows in set (0.001 sec) -``` - -1. Create a materialized view - - Since the user wants to query the UV value of the advertisement, that is, a precise de-duplication of users of the same advertisement is required, the user's query is generally: - - ``` - SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel; - ``` - - For this kind of UV-seeking scene, we can create a materialized view with `bitmap_union` to achieve a precise deduplication effect in advance. - - In Doris, the result of `count(distinct)` aggregation is exactly the same as the result of `bitmap_union_count` aggregation. And `bitmap_union_count` is equal to the result of `bitmap_union` to calculate count, so if the query ** involves `count(distinct)`, you can speed up the query by creating a materialized view with `bitmap_union` aggregation.** - - For this case, you can create a materialized view that accurately deduplicate `user_id` based on advertising and channel grouping. - - ``` - MySQL [test]> create materialized view advertiser_uv as select advertiser, channel, bitmap_union(to_bitmap(user_id)) from advertiser_view_record group by advertiser, channel; - Query OK, 0 rows affected (0.012 sec) - ``` - - *Note: Because the user\_id itself is an INT type, it is called `bitmap_union` directly in Doris. The fields need to be converted to bitmap type through the function `to_bitmap` first, and then `bitmap_union` can be aggregated. * - - After the creation is complete, the table structure of the advertisement click schedule and the materialized view table is as follows: - - ``` - MySQL [test]> desc advertiser_view_record all; - +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+ - | IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra | - +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+ - | advertiser_view_record | DUP_KEYS | time | DATE | Yes | true | NULL | | - | | | advertiser | VARCHAR(10) | Yes | true | NULL | | - | | | channel | VARCHAR(10) | Yes | false | NULL | NONE | - | | | user_id | INT | Yes | false | NULL | NONE | - | | | | | | | | | - | advertiser_uv | AGG_KEYS | advertiser | VARCHAR(10) | Yes | true | NULL | | - | | | channel | VARCHAR(10) | Yes | true | NULL | | - | | | to_bitmap(`user_id`) | BITMAP | No | false | | BITMAP_UNION | - +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+ - ``` - -2. Automatic query matching - - When the materialized view table is created, when querying the advertisement UV, Doris will automatically query the data from the materialized view `advertiser_uv` just created. For example, the original query statement is as follows: - - ``` - SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel; - ``` - - After the materialized view is selected, the actual query will be transformed into: - - ``` - SELECT advertiser, channel, bitmap_union_count(to_bitmap(user_id)) FROM advertiser_uv GROUP BY advertiser, channel; - ``` - - Through the EXPLAIN command, you can check whether Doris matches the materialized view: - - ``` - MySQL [test]> explain SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel; - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Explain String | - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | PLAN FRAGMENT 0 | - | OUTPUT EXPRS: `advertiser` | `channel` | bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`) | - | PARTITION: UNPARTITIONED | - | | - | RESULT SINK | - | | - | 4:EXCHANGE | - | | - | PLAN FRAGMENT 1 | - | OUTPUT EXPRS: | - | PARTITION: HASH_PARTITIONED: `advertiser`, `channel` | - | | - | STREAM DATA SINK | - | EXCHANGE ID: 04 | - | UNPARTITIONED | - | | - | 3:AGGREGATE (merge finalize) | - | | output: bitmap_union_count( bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`)) | - | | group by: `advertiser`, `channel` | - | | | - | 2:EXCHANGE | - | | - | PLAN FRAGMENT 2 | - | OUTPUT EXPRS: | - | PARTITION: RANDOM | - | | - | STREAM DATA SINK | - | EXCHANGE ID: 02 | - | HASH_PARTITIONED: `advertiser`, `channel` | - | | - | 1:AGGREGATE (update serialize) | - | | STREAMING | - | | output: bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`) | - | | group by: `advertiser`, `channel` | - | | | - | 0:OlapScanNode | - | TABLE: advertiser_view_record | - | PREAGGREGATION: ON | - | partitions=1/1 | - | rollup: advertiser_uv | - | tabletRatio=10/10 | - | tabletList=22084,22086,22088,22090,22092,22094,22096,22098,22100,22102 | - | cardinality=0 | - | avgRowSize=0.0 | - | numNodes=1 | - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - 45 rows in set (0.030 sec) - ``` - - In the result of EXPLAIN, you can first see that the rollup attribute value of OlapScanNode is advertiser_uv. In other words, the query directly scans the data of the materialized view. The match is successful. - - Secondly, the calculation of `count(distinct)` for the `user_id` field is rewritten as `bitmap_union_count`. That is to achieve the effect of precise deduplication through bitmap. - - -## Best Practice 3 - -Business scenario: matching a richer prefix index - -The user's original table has three columns (k1, k2, k3). Among them, k1, k2 are prefix index columns. At this time, if the user query condition contains `where k1=a and k2=b`, the query can be accelerated through the index. - -But in some cases, the user's filter conditions cannot match the prefix index, such as `where k3=c`. Then the query speed cannot be improved through the index. - -This problem can be solved by creating a materialized view with k3 as the first column. - -1. Create a materialized view - - ``` - CREATE MATERIALIZED VIEW mv_1 as SELECT k3, k2, k1 FROM tableA ORDER BY k3; - ``` - - After the creation of the above grammar is completed, the complete detail data is retained in the materialized view, and the prefix index of the materialized view is the k3 column. The table structure is as follows: - - ``` - MySQL [test]> desc tableA all; - +-----------+---------------+-------+------+------+-------+---------+-------+ - | IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra | - +-----------+---------------+-------+------+------+-------+---------+-------+ - | tableA | DUP_KEYS | k1 | INT | Yes | true | NULL | | - | | | k2 | INT | Yes | true | NULL | | - | | | k3 | INT | Yes | true | NULL | | - | | | | | | | | | - | mv_1 | DUP_KEYS | k3 | INT | Yes | true | NULL | | - | | | k2 | INT | Yes | false | NULL | NONE | - | | | k1 | INT | Yes | false | NULL | NONE | - +-----------+---------------+-------+------+------+-------+---------+-------+ - ``` - -2. Query matching - - At this time, if the user's query has k3 column, the filter condition is, for example: - - ``` - select k1, k2, k3 from table A where k3=1; - ``` - - At this time, the query will read data directly from the mv_1 materialized view just created. The materialized view has a prefix index on k3, and query efficiency will also be improved. - - -## Limitations - -1. The parameter of the aggregate function of the materialized view does not support the expression only supports a single column, for example: sum(a+b) does not support. -2. If the conditional column of the delete statement does not exist in the materialized view, the delete operation cannot be performed. If you must delete data, you need to delete the materialized view before deleting the data. -3. Too many materialized views on a single table will affect the efficiency of importing: When importing data, the materialized view and base table data are updated synchronously. If a table has more than 10 materialized view tables, it may cause the import speed to be very high. slow. This is the same as a single import needs to import 10 tables at the same time. -4. The same column with different aggregate functions cannot appear in a materialized view at the same time. For example, select sum(a), min(a) from table are not supported. -5. For the Unique Key data model, the materialized view can only change the column order and cannot play the role of aggregation. Therefore, in the Unique Key model, it is not possible to perform coarse-grained aggregation operations on the data by creating a materialized view. - -## Error -1. DATA_QUALITY_ERR: "The data quality does not satisfy, please check your data" -Materialized view creation failed due to data quality issues. -Note: The bitmap type only supports positive integers. If there are negative Numbers in the original data, the materialized view will fail to be created diff --git a/docs/en/administrator-guide/multi-tenant.md b/docs/en/administrator-guide/multi-tenant.md deleted file mode 100644 index 8c37afad23..0000000000 --- a/docs/en/administrator-guide/multi-tenant.md +++ /dev/null @@ -1,222 +0,0 @@ ---- -{ - "title": "Multi-tenancy", - "language": "en" -} ---- - - - -# Multi-tenancy - -The main purpose of Doris's multi-tenant and resource isolation solution is to reduce interference between multiple users when performing data operations in the same Doris cluster, and to allocate cluster resources to each user more reasonably. - -The scheme is mainly divided into two parts, one is the division of resource groups at the node level in the cluster, and the other is the resource limit for a single query. - -## Nodes in Doris - -First, let's briefly introduce the node composition of Doris. There are two types of nodes in a Doris cluster: Frontend (FE) and Backend (BE). - -FE is mainly responsible for metadata management, cluster management, user request access and query plan analysis. - -BE is mainly responsible for data storage and execution of query plans. - -FE does not participate in the processing and calculation of user data, so it is a node with low resource consumption. The BE is responsible for all data calculations and task processing, and is a resource-consuming node. Therefore, the resource division and resource restriction schemes introduced in this article are all aimed at BE nodes. Because the FE node consumes relatively low resources and can also be scaled horizontally, there is usually no need to isolate and restrict resources, and the FE node can be shared by all users. - -## Node resource division - -Node resource division refers to setting tags for BE nodes in a Doris cluster, and the BE nodes with the same tags form a resource group. Resource group can be regarded as a management unit of data storage and calculation. Below we use a specific example to introduce the use of resource groups. - -1. Set labels for BE nodes - - Assume that the current Doris cluster has 6 BE nodes. They are host[1-6] respectively. In the initial situation, all nodes belong to a default resource group (Default). - - We can use the following command to divide these 6 nodes into 3 resource groups: group_a, group_b, group_c: - - ```sql - alter system modify backend "host1:9050" set ("tag.location" = "group_a"); - alter system modify backend "host2:9050" set ("tag.location" = "group_a"); - alter system modify backend "host3:9050" set ("tag.location" = "group_b"); - alter system modify backend "host4:9050" set ("tag.location" = "group_b"); - alter system modify backend "host5:9050" set ("tag.location" = "group_c"); - alter system modify backend "host6:9050" set ("tag.location" = "group_c"); - ``` - - Here we combine `host[1-2]` to form a resource group `group_a`, `host[3-4]` to form a resource group `group_b`, and `host[5-6]` to form a resource group `group_c`. - - > Note: One BE only supports setting one Tag. - -2. Distribution of data according to resource groups - - After the resource group is divided. We can distribute different copies of user data in different resource groups. Assume a user table UserTable. We want to store a copy in each of the three resource groups, which can be achieved by the following table creation statement: - - ```sql - create table UserTable - (k1 int, k2 int) - distributed by hash(k1) buckets 1 - properties( - "replication_allocation" - = - "tag.location.group_a:1, tag.location.group_b:1, tag.location.group_c:1" - ) - ``` - - In this way, the data in the UserTable table will be stored in the form of 3 copies in the nodes where the resource groups group_a, group_b, and group_c are located. - - The following figure shows the current node division and data distribution: - - ``` - ┌────────────────────────────────────────────────────┐ - │ │ - │ ┌──────────────────┐ ┌──────────────────┐ │ - │ │ host1 │ │ host2 │ │ - │ │ ┌─────────────┐ │ │ │ │ - │ group_a │ │ replica1 │ │ │ │ │ - │ │ └─────────────┘ │ │ │ │ - │ │ │ │ │ │ - │ └──────────────────┘ └──────────────────┘ │ - │ │ - ├────────────────────────────────────────────────────┤ - ├────────────────────────────────────────────────────┤ - │ │ - │ ┌──────────────────┐ ┌──────────────────┐ │ - │ │ host3 │ │ host4 │ │ - │ │ │ │ ┌─────────────┐ │ │ - │ group_b │ │ │ │ replica2 │ │ │ - │ │ │ │ └─────────────┘ │ │ - │ │ │ │ │ │ - │ └──────────────────┘ └──────────────────┘ │ - │ │ - ├────────────────────────────────────────────────────┤ - ├────────────────────────────────────────────────────┤ - │ │ - │ ┌──────────────────┐ ┌──────────────────┐ │ - │ │ host5 │ │ host6 │ │ - │ │ │ │ ┌─────────────┐ │ │ - │ group_c │ │ │ │ replica3 │ │ │ - │ │ │ │ └─────────────┘ │ │ - │ │ │ │ │ │ - │ └──────────────────┘ └──────────────────┘ │ - │ │ - └────────────────────────────────────────────────────┘ - ``` - -3. Use different resource groups for data query - - After the execution of the first two steps is completed, we can limit a user's query by setting the user's resource usage permissions, and can only use the nodes in the specified resource group to execute. - - For example, we can use the following statement to restrict user1 to only use nodes in the `group_a` resource group for data query, user2 can only use the `group_b` resource group, and user3 can use 3 resource groups at the same time: - - ```sql - set property for'user1''resource_tags.location' = 'group_a'; - set property for'user2''resource_tags.location' = 'group_b'; - set property for'user3''resource_tags.location' = 'group_a, group_b, group_c'; - ``` - - After the setting is complete, when user1 initiates a query on the UserTable table, it will only access the data copy on the nodes in the `group_a` resource group, and the query will only use the node computing resources in the `group_a` resource group. The query of user3 can use copies and computing resources in any resource group. - - In this way, we have achieved physical resource isolation for different user queries by dividing nodes and restricting user resource usage. Furthermore, we can create different users for different business departments and restrict each user from using different resource groups. In order to avoid the use of resource interference between different business parts. For example, there is a business table in the cluster that needs to be shared by all 9 business departments, but it is hoped that resource preemption between different departments can be avoided as much as possible. Then we can create 3 copies of this table and store them in 3 resource groups. Next, we create 9 users for 9 business departments, and limit the use of one resource group for every 3 users. In this way, the degree of competition for resources is reduced from 9 to 3. - - On the other hand, for the isolation of online and offline tasks. We can use resource groups to achieve this. For example, we can divide nodes into two resource groups, Online and Offline. The table data is still stored in 3 copies, of which 2 copies are stored in the Online resource group, and 1 copy is stored in the Offline resource group. The Online resource group is mainly used for online data services with high concurrency and low latency. Some large queries or offline ETL operations can be executed using nodes in the Offline resource group. So as to realize the ability to provide online and offline services simultaneously in a unified cluster. - -## Single query resource limit - -The resource group method mentioned earlier is resource isolation and restriction at the node level. In the resource group, resource preemption problems may still occur. For example, as mentioned above, the three business departments are arranged in the same resource group. Although the degree of resource competition is reduced, the queries of these three departments may still affect each other. - -Therefore, in addition to the resource group solution, Doris also provides a single query resource restriction function. - -At present, Doris's resource restrictions on single queries are mainly divided into two aspects: CPU and memory restrictions. - -1. Memory Limitation - - Doris can limit the maximum memory overhead that a query is allowed to use. To ensure that the memory resources of the cluster will not be fully occupied by a query. We can set the memory limit in the following ways: - - ``` - // Set the session variable exec_mem_limit. Then all subsequent queries in the session (within the connection) use this memory limit. - set exec_mem_limit=1G; - // Set the global variable exec_mem_limit. Then all subsequent queries of all new sessions (new connections) use this memory limit. - set global exec_mem_limit=1G; - // Set the variable exec_mem_limit in SQL. Then the variable only affects this SQL. - select /*+ SET_VAR(exec_mem_limit=1G) */ id, name from tbl where xxx; - ``` - - Because Doris' query engine is based on the full-memory MPP query framework. Therefore, when the memory usage of a query exceeds the limit, the query will be terminated. Therefore, when a query cannot run under a reasonable memory limit, we need to solve it through some SQL optimization methods or cluster expansion. - -2. CPU limitations - - Users can limit the CPU resources of the query in the following ways: - - ``` - // Set the session variable cpu_resource_limit. Then all queries in the session (within the connection) will use this CPU limit. - set cpu_resource_limit = 2 - // Set the user's attribute cpu_resource_limit, then all queries of this user will use this CPU limit. The priority of this attribute is higher than the session variable cpu_resource_limit - set property for'user1''cpu_resource_limit' = '3'; - ``` - - The value of `cpu_resource_limit` is a relative value. The larger the value, the more CPU resources can be used. However, the upper limit of the CPU that can be used by a query also depends on the number of partitions and buckets of the table. In principle, the maximum CPU usage of a query is positively related to the number of tablets involved in the query. In extreme cases, assuming that a query involves only one tablet, even if `cpu_resource_limit` is set to a larger value, only 1 CPU resource can be used. - -Through memory and CPU resource limits. We can divide user queries into more fine-grained resources within a resource group. For example, we can make some offline tasks with low timeliness requirements, but with a large amount of calculation, use less CPU resources and more memory resources. Some delay-sensitive online tasks use more CPU resources and reasonable memory resources. - -## Best practices and forward compatibility - -Tag division and CPU limitation are new features in version 0.15. In order to ensure a smooth upgrade from the old version, Doris has made the following forward compatibility: - -1. Each BE node will have a default Tag: `"tag.location": "default"`. -2. The BE node added through the `alter system add backend` statement will also set Tag: `"tag.location": "default"` by default. -2. The copy distribution of all tables is modified by default to: `"tag.location.default:xx`. xx is the number of original copies. -3. Users can still specify the number of replicas in the table creation statement by `"replication_num" = "xx"`, this attribute will be automatically converted to: `"tag.location.default:xx`. This ensures that there is no need to modify the original creation. Table statement. -4. By default, the memory limit for a single query is 2GB for a single node, and the CPU resources are unlimited, which is consistent with the original behavior. And the user's `resource_tags.location` attribute is empty, that is, by default, the user can access the BE of any Tag, which is consistent with the original behavior. - -Here we give an example of the steps to start using the resource division function after upgrading from the original cluster to version 0.15: - -1. Turn off data repair and balance logic - - After the upgrade, the default Tag of BE is `"tag.location": "default"`, and the default copy distribution of the table is: `"tag.location.default:xx`. So if you directly modify the Tag of BE, the system will Automatically detect changes in the distribution of copies, and start data redistribution. This may occupy some system resources. So we can turn off the data repair and balance logic before modifying the tag to ensure that there will be no copies when we plan resources Redistribution operation. - - ``` - ADMIN SET FRONTEND CONFIG ("disable_balance" = "true"); - ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "true"); - ``` - -2. Set Tag and table copy distribution - - Next, you can use the `alter system modify backend` statement to set the BE Tag. And through the `alter table` statement to modify the copy distribution strategy of the table. Examples are as follows: - - ``` - alter system modify backend "host1:9050, 1212:9050" set ("tag.location" = "group_a"); - alter table my_table modify partition p1 set ("replication_allocation" = "tag.location.group_a:2"); - ``` - -3. Turn on data repair and balance logic - - After the tag and copy distribution are set, we can turn on the data repair and equalization logic to trigger data redistribution. - - ``` - ADMIN SET FRONTEND CONFIG ("disable_balance" = "false"); - ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "false"); - ``` - - This process will continue for a period of time depending on the amount of data involved. And it will cause some colocation tables to fail colocation planning (because the copy is being migrated). You can view the progress by `show proc "/cluster_balance/"`. You can also judge the progress by the number of `UnhealthyTabletNum` in `show proc "/statistic"`. When `UnhealthyTabletNum` drops to 0, it means that the data redistribution is completed. . - -4. Set the user's resource label permissions. - - After the data is redistributed. We can start to set the user's resource label permissions. Because by default, the user's `resource_tags.location` attribute is empty, that is, the BE of any tag can be accessed. Therefore, in the previous steps, the normal query of existing users will not be affected. When the `resource_tags.location` property is not empty, the user will be restricted from accessing the BE of the specified Tag. - -Through the above 4 steps, we can smoothly use the resource division function after the original cluster is upgraded. diff --git a/docs/en/administrator-guide/operation/be-olap-error-code.md b/docs/en/administrator-guide/operation/be-olap-error-code.md deleted file mode 100644 index 5b4514cf76..0000000000 --- a/docs/en/administrator-guide/operation/be-olap-error-code.md +++ /dev/null @@ -1,256 +0,0 @@ ---- -{ - "title": "Description of the return value of the OLAP function on the BE side", - "language": "en" -} - ---- - - - -# Description of the return value of the OLAP function on the BE side - - - -| Return value name | Return value | Return value description | -| ------------------------------------------------ | ------ | ------------------------------------------- ----------------- | -| OLAP_SUCCESS | 0 | Success | -| OLAP_ERR_OTHER_ERROR | -1 | Other errors | -| OLAP_REQUEST_FAILED | -2 | Request failed | -| System error codes, such as file system memory and other system call failures | | | -| OLAP_ERR_OS_ERROR | -100 | Operating system error | -| OLAP_ERR_DIR_NOT_EXIST | -101 | Directory does not exist error | -| OLAP_ERR_FILE_NOT_EXIST | -102 | File does not exist error | -| OLAP_ERR_CREATE_FILE_ERROR | -103 | Error creating file | -| OLAP_ERR_MALLOC_ERROR | -104 | Memory allocation error | -| OLAP_ERR_STL_ERROR | -105 | Standard template library error | -| OLAP_ERR_IO_ERROR | -106 | IO error | -| OLAP_ERR_MUTEX_ERROR | -107 | Mutex error | -| OLAP_ERR_PTHREAD_ERROR | -108 | POSIX thread error | -| OLAP_ERR_NETWORK_ERROR | -109 | Network abnormal error | -| OLAP_ERR_UB_FUNC_ERROR | -110 | | -| OLAP_ERR_COMPRESS_ERROR | -111 | Data compression error | -| OLAP_ERR_DECOMPRESS_ERROR | -112 | Data decompression error | -| OLAP_ERR_UNKNOWN_COMPRESSION_TYPE | -113 | Unknown data compression type | -| OLAP_ERR_MMAP_ERROR | -114 | Memory mapped file error | -| OLAP_ERR_RWLOCK_ERROR | -115 | Read-write lock error | -| OLAP_ERR_READ_UNENOUGH | -116 | Read memory is not enough exception | -| OLAP_ERR_CANNOT_CREATE_DIR | -117 | Cannot create directory exception | -| OLAP_ERR_UB_NETWORK_ERROR | -118 | Network exception | -| OLAP_ERR_FILE_FORMAT_ERROR | -119 | File format abnormal | -| OLAP_ERR_EVAL_CONJUNCTS_ERROR | -120 | | -| OLAP_ERR_COPY_FILE_ERROR | -121 | Copy file error | -| OLAP_ERR_FILE_ALREADY_EXIST | -122 | File already exists error | -| General error codes | | | -| OLAP_ERR_NOT_INITED | -200 | Cannot initialize exception | -| OLAP_ERR_FUNC_NOT_IMPLEMENTED | -201 | Function cannot be executed exception | -| OLAP_ERR_CALL_SEQUENCE_ERROR | -202 | Call SEQUENCE exception | -| OLAP_ERR_INPUT_PARAMETER_ERROR | -203 | Input parameter error | -| OLAP_ERR_BUFFER_OVERFLOW | -204 | Memory buffer overflow error | -| OLAP_ERR_CONFIG_ERROR | -205 | Configuration error | -| OLAP_ERR_INIT_FAILED | -206 | Initialization failed | -| OLAP_ERR_INVALID_SCHEMA | -207 | Invalid Schema | -| OLAP_ERR_CHECKSUM_ERROR | -208 | Check value error | -| OLAP_ERR_SIGNATURE_ERROR | -209 | Signature error | -| OLAP_ERR_CATCH_EXCEPTION | -210 | Exception caught | -| OLAP_ERR_PARSE_PROTOBUF_ERROR | -211 | Error parsing Protobuf| -| OLAP_ERR_INVALID_ROOT_PATH | -222 | Invalid root directory | -| OLAP_ERR_NO_AVAILABLE_ROOT_PATH | -223 | No valid root directory | -| OLAP_ERR_CHECK_LINES_ERROR | -224 | Check the number of lines error | -| OLAP_ERR_INVALID_CLUSTER_INFO | -225 | Invalid Cluster Information | -| OLAP_ERR_TRANSACTION_NOT_EXIST | -226 | Transaction does not exist | -| OLAP_ERR_DISK_FAILURE | -227 | Disk error | -| OLAP_ERR_TRANSACTION_ALREADY_COMMITTED | -228 | Transaction submitted | -| OLAP_ERR_TRANSACTION_ALREADY_VISIBLE | -229 | Transaction visible | -| OLAP_ERR_VERSION_ALREADY_MERGED | -230 | Version has been merged | -| OLAP_ERR_LZO_DISABLED | -231 | LZO is disabled | -| OLAP_ERR_DISK_REACH_CAPACITY_LIMIT | -232 | Disk reached capacity limit | -| OLAP_ERR_TOO_MANY_TRANSACTIONS | -233 | Too many transaction backlogs are not completed | -| OLAP_ERR_INVALID_SNAPSHOT_VERSION | -234 | Invalid snapshot version | -| OLAP_ERR_TOO_MANY_VERSION | -235 | The tablet data version exceeds the maximum limit (default 500) | -| OLAP_ERR_NOT_INITIALIZED | -236 | Cannot initialize | -| OLAP_ERR_ALREADY_CANCELLED | -237 | Has been cancelled | -| OLAP_ERR_TOO_MANY_SEGMENTS | -238 | usually occurs when the amount of imported data in the same batch is too large, resulting in too many segment files for a tablet | -| Command execution exception code | | | -| OLAP_ERR_CE_CMD_PARAMS_ERROR | -300 | Command parameter error | -| OLAP_ERR_CE_BUFFER_TOO_SMALL | -301 | Too many small files in the buffer | -| OLAP_ERR_CE_CMD_NOT_VALID | -302 | Invalid command | -| OLAP_ERR_CE_LOAD_TABLE_ERROR | -303 | Error loading data table | -| OLAP_ERR_CE_NOT_FINISHED | -304 | The command was not executed successfully | -| OLAP_ERR_CE_TABLET_ID_EXIST | -305 | Tablet Id does not exist error | -| OLAP_ERR_CE_TRY_CE_LOCK_ERROR | -306 | Attempt to obtain execution command lock error | -| Tablet error exception code | | | -| OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR | -400 | Tablet copy version error | -| OLAP_ERR_TABLE_VERSION_INDEX_MISMATCH_ERROR | -401 | teblet version index mismatch exception | -| OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR | -402 | The initial version of the tablet is not checked here, because if the BE is restarted during a schema-change of a tablet, we may encounter an empty tablet exception | -| OLAP_ERR_TABLE_INDEX_FIND_ERROR | -403 | Unable to get the position of the first block or failure to find the last block of the block will cause this exception | -| OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR | -404 | This exception is triggered when the tablet cannot be loaded | -| OLAP_ERR_TABLE_CREATE_META_ERROR | -405 | Unable to create tablet (change schema), base tablet does not exist, this exception will be triggered | -| OLAP_ERR_TABLE_ALREADY_DELETED_ERROR | -406 | The tablet has been deleted | -| Storage Engine Error Code | | | -| OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE | -500 | Add the same tablet twice, add the tablet to the same data directory twice, the new tablet is empty, and the old tablet exists. Will trigger this exception | -| OLAP_ERR_ENGINE_DROP_NOEXISTS_TABLE | -501 | Delete non-existent table | -| OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR | -502 | Failed to load tablet_meta, segment group meta with invalid cumulative rowset, will cause this exception | -| OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR | -503 | Duplicate table insert | -| OLAP_ERR_DELETE_VERSION_ERROR | -504 | Delete version error | -| OLAP_ERR_GC_SCAN_PATH_ERROR | -505 | GC scan path error | -| OLAP_ERR_ENGINE_INSERT_OLD_TABLET | -506 | When BE is restarting and older tablets have been added to the garbage collection queue but have not yet been deleted. In this case, since data_dirs are loaded in parallel, tablets loaded later may be loaded later than before The tablet is old, this should not be confirmed as a failure, so return to change the code at this time | -| Fetch Handler error code | | | -| OLAP_ERR_FETCH_OTHER_ERROR | -600 | FetchHandler other errors | -| OLAP_ERR_FETCH_TABLE_NOT_EXIST | -601 | FetchHandler table does not exist | -| OLAP_ERR_FETCH_VERSION_ERROR | -602 | FetchHandler version error | -| OLAP_ERR_FETCH_SCHEMA_ERROR | -603 | FetchHandler Schema error | -| OLAP_ERR_FETCH_COMPRESSION_ERROR | -604 | FetchHandler compression error | -| OLAP_ERR_FETCH_CONTEXT_NOT_EXIST | -605 | FetchHandler context does not exist | -| OLAP_ERR_FETCH_GET_READER_PARAMS_ERR | -606 | FetchHandler GET read parameter error | -| OLAP_ERR_FETCH_SAVE_SESSION_ERR | -607 | FetchHandler save session error | -| OLAP_ERR_FETCH_MEMORY_EXCEEDED | -608 | FetchHandler memory exceeded exception | -| Read exception error code | | | -| OLAP_ERR_READER_IS_UNINITIALIZED | -700 | Read cannot be initialized | -| OLAP_ERR_READER_GET_ITERATOR_ERROR | -701 | Get read iterator error | -| OLAP_ERR_CAPTURE_ROWSET_READER_ERROR | -702 | Current Rowset read error | -| OLAP_ERR_READER_READING_ERROR | -703 | Failed to initialize column data, the column data of cumulative rowset is invalid, this exception code will be returned | -| OLAP_ERR_READER_INITIALIZE_ERROR | -704 | Read initialization failed | -| BaseCompaction exception code information | | | -| OLAP_ERR_BE_VERSION_NOT_MATCH | -800 | BE Compaction version mismatch error | -| OLAP_ERR_BE_REPLACE_VERSIONS_ERROR | -801 | BE Compaction replacement version error | -| OLAP_ERR_BE_MERGE_ERROR | -802 | BE Compaction merge error | -| OLAP_ERR_CAPTURE_ROWSET_ERROR | -804 | Cannot find the version corresponding to Rowset | -| OLAP_ERR_BE_SAVE_HEADER_ERROR | -805 | BE Compaction save header error | -| OLAP_ERR_BE_INIT_OLAP_DATA | -806 | BE Compaction initialized OLAP data error | -| OLAP_ERR_BE_TRY_OBTAIN_VERSION_LOCKS | -807 | BE Compaction trying to obtain version lock error | -| OLAP_ERR_BE_NO_SUITABLE_VERSION | -808 | BE Compaction does not have a suitable version | -| OLAP_ERR_BE_TRY_BE_LOCK_ERROR | -809 | The other base compaction is running, and the attempt to acquire the lock failed | -| OLAP_ERR_BE_INVALID_NEED_MERGED_VERSIONS | -810 | Invalid Merge version | -| OLAP_ERR_BE_ERROR_DELETE_ACTION | -811 | BE performing delete operation error | -| OLAP_ERR_BE_SEGMENTS_OVERLAPPING | -812 | Rowset exception with overlapping cumulative points | -| OLAP_ERR_BE_CLONE_OCCURRED | -813 | Cloning tasks may occur after the compression task is submitted to the thread pool, and the set of rows selected for compression may change. In this case, the current compression task should not be performed. Return this code | -| PUSH exception code | | | -| OLAP_ERR_PUSH_INIT_ERROR | -900 | Unable to initialize reader, unable to create table descriptor, unable to initialize memory tracker, unsupported file format type, unable to open scanner, unable to obtain tuple descriptor, failed to allocate memory for tuple, Will return this code | -| OLAP_ERR_PUSH_DELTA_FILE_EOF | -901 | | -| OLAP_ERR_PUSH_VERSION_INCORRECT | -902 | PUSH version is incorrect | -| OLAP_ERR_PUSH_SCHEMA_MISMATCH | -903 | PUSH Schema does not match | -| OLAP_ERR_PUSH_CHECKSUM_ERROR | -904 | PUSH check value error | -| OLAP_ERR_PUSH_ACQUIRE_DATASOURCE_ERROR | -905 | PUSH get data source error | -| OLAP_ERR_PUSH_CREAT_CUMULATIVE_ERROR | -906 | PUSH Create CUMULATIVE error code | -| OLAP_ERR_PUSH_BUILD_DELTA_ERROR | -907 | The pushed incremental file has an incorrect check code | -| OLAP_ERR_PUSH_VERSION_ALREADY_EXIST | -908 | PUSH version already exists | -| OLAP_ERR_PUSH_TABLE_NOT_EXIST | -909 | PUSH table does not exist | -| OLAP_ERR_PUSH_INPUT_DATA_ERROR | -910 | PUSH data is invalid, it may be length, data type and other issues | -| OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST | -911 | When submitting the transaction to the engine, it is found that Rowset exists, but the Rowset ID is different | -| OLAP_ERR_PUSH_BATCH_PROCESS_REMOVED | -912 | Deleted the push batch process | -| OLAP_ERR_PUSH_COMMIT_ROWSET | -913 | PUSH Commit Rowset | -| OLAP_ERR_PUSH_ROWSET_NOT_FOUND | -914 | PUSH Rowset not found | -| SegmentGroup exception code | | | -| OLAP_ERR_INDEX_LOAD_ERROR | -1000 | Load index error | -| OLAP_ERR_INDEX_EOF | -1001 | | -| OLAP_ERR_INDEX_CHECKSUM_ERROR | -1002 | Checksum verification error, segment error loading index. | -| OLAP_ERR_INDEX_DELTA_PRUNING | -1003 | Index incremental pruning | -| OLAPData exception code information | | | -| OLAP_ERR_DATA_ROW_BLOCK_ERROR | -1100 | Data row Block block error | -| OLAP_ERR_DATA_FILE_TYPE_ERROR | -1101 | Data file type error | -| OLAP_ERR_DATA_EOF | -1102 | | -| OLAP data write error code | | | -| OLAP_ERR_WRITER_INDEX_WRITE_ERROR | -1200 | Index write error | -| OLAP_ERR_WRITER_DATA_WRITE_ERROR | -1201 | Data writing error | -| OLAP_ERR_WRITER_ROW_BLOCK_ERROR | -1202 | Row Block block write error | -| OLAP_ERR_WRITER_SEGMENT_NOT_FINALIZED | -1203 | Before adding a new segment, the previous segment was not completed | -| RowBlock error code | | | -| OLAP_ERR_ROWBLOCK_DECOMPRESS_ERROR | -1300 | Rowblock decompression error | -| OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION | -1301 | Failed to obtain Block Entry | -| OLAP_ERR_ROWBLOCK_READ_INFO_ERROR | -1302 | Error reading Rowblock information | -| Tablet metadata error | | | -| OLAP_ERR_HEADER_ADD_VERSION | -1400 | Tablet metadata increase version | -| OLAP_ERR_HEADER_DELETE_VERSION | -1401 | Tablet metadata deletion version | -| OLAP_ERR_HEADER_ADD_PENDING_DELTA | -1402 | Tablet metadata add pending increment | -| OLAP_ERR_HEADER_ADD_INCREMENTAL_VERSION | -1403 | Tablet metadata addition self-increment version | -| OLAP_ERR_HEADER_INVALID_FLAG | -1404 | Invalid tablet metadata flag | -| OLAP_ERR_HEADER_PUT | -1405 | tablet metadata PUT operation | -| OLAP_ERR_HEADER_DELETE | -1406 | tablet metadata DELETE operation | -| OLAP_ERR_HEADER_GET | -1407 | tablet metadata GET operation | -| OLAP_ERR_HEADER_LOAD_INVALID_KEY | -1408 | Tablet metadata loading invalid Key | -| OLAP_ERR_HEADER_FLAG_PUT | -1409 | | -| OLAP_ERR_HEADER_LOAD_JSON_HEADER | -1410 | tablet metadata loading JSON Header | -| OLAP_ERR_HEADER_INIT_FAILED | -1411 | Tablet metadata header initialization failed | -| OLAP_ERR_HEADER_PB_PARSE_FAILED | -1412 | Tablet metadata Protobuf parsing failed | -| OLAP_ERR_HEADER_HAS_PENDING_DATA | -1413 | Tablet metadata pending data | -| TabletSchema exception code information | | | -| OLAP_ERR_SCHEMA_SCHEMA_INVALID | -1500 | Invalid Tablet Schema | -| OLAP_ERR_SCHEMA_SCHEMA_FIELD_INVALID | -1501 | Tablet Schema field is invalid | -| SchemaHandler exception code information | | | -| OLAP_ERR_ALTER_MULTI_TABLE_ERR | -1600 | ALTER multi-table error | -| OLAP_ERR_ALTER_DELTA_DOES_NOT_EXISTS | -1601 | Failed to get all data sources, Tablet has no version | -| OLAP_ERR_ALTER_STATUS_ERR | -1602 | Failed to check the row number, internal sorting failed, row block sorting failed, these will return this code | -| OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED | -1603 | The previous schema change is not completed | -| OLAP_ERR_SCHEMA_CHANGE_INFO_INVALID | -1604 | Schema change information is invalid | -| OLAP_ERR_QUERY_SPLIT_KEY_ERR | -1605 | Query Split key error | -| OLAP_ERR_DATA_QUALITY_ERR | -1606 | Errors caused by data quality issues during schema changes/materialized views | -| Column File error code | | | -| OLAP_ERR_COLUMN_DATA_LOAD_BLOCK | -1700 | Error loading column data block | -| OLAP_ERR_COLUMN_DATA_RECORD_INDEX | -1701 | Load data record index error | -| OLAP_ERR_COLUMN_DATA_MAKE_FILE_HEADER | -1702 | | -| OLAP_ERR_COLUMN_DATA_READ_VAR_INT | -1703 | Cannot read column data from Stream | -| OLAP_ERR_COLUMN_DATA_PATCH_LIST_NUM | -1704 | | -| OLAP_ERR_COLUMN_STREAM_EOF | -1705 | If the data stream ends, return this code | -| OLAP_ERR_COLUMN_READ_STREAM | -1706 | The block size is greater than the buffer size, the remaining compressed size is less than the Stream header size, and the read stream fails. This exception will be thrown in these cases | -| OLAP_ERR_COLUMN_STREAM_NOT_EXIST | -1707 | Stream is empty, does not exist, the data stream is not found, etc. The exception code is returned | -| OLAP_ERR_COLUMN_VALUE_NULL | -1708 | Column value is empty exception | -| OLAP_ERR_COLUMN_SEEK_ERROR | -1709 | If you add a column through a schema change, the column index may exist due to the schema change, and this exception code is returned | -| DeleteHandler error code | | | -| OLAP_ERR_DELETE_INVALID_CONDITION | -1900 | Invalid delete condition | -| OLAP_ERR_DELETE_UPDATE_HEADER_FAILED | -1901 | Delete update Header error | -| OLAP_ERR_DELETE_SAVE_HEADER_FAILED | -1902 | Delete save header error | -| OLAP_ERR_DELETE_INVALID_PARAMETERS | -1903 | Invalid delete parameter | -| OLAP_ERR_DELETE_INVALID_VERSION | -1904 | Invalid delete version | -| Cumulative Handler error code | | | -| OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS | -2000 | Cumulative does not have a suitable version | -| OLAP_ERR_CUMULATIVE_REPEAT_INIT | -2001 | Cumulative Repeat initialization error | -| OLAP_ERR_CUMULATIVE_INVALID_PARAMETERS | -2002 | Invalid Cumulative parameter | -| OLAP_ERR_CUMULATIVE_FAILED_ACQUIRE_DATA_SOURCE | -2003 | Cumulative failed to obtain data source | -| OLAP_ERR_CUMULATIVE_INVALID_NEED_MERGED_VERSIONS | -2004 | Cumulative does not have a valid version that needs to be merged | -| OLAP_ERR_CUMULATIVE_ERROR_DELETE_ACTION | -2005 | Cumulative delete operation error | -| OLAP_ERR_CUMULATIVE_MISS_VERSION | -2006 | rowsets missing version | -| OLAP_ERR_CUMULATIVE_CLONE_OCCURRED | -2007 | Cloning tasks may occur after the compression task is submitted to the thread pool, and the set of rows selected for compression may change. In this case, the current compression task should not be performed. Otherwise it will trigger a change exception | -| OLAPMeta exception code | | | -| OLAP_ERR_META_INVALID_ARGUMENT | -3000 | Invalid metadata parameter | -| OLAP_ERR_META_OPEN_DB | -3001 | Open DB metadata error | -| OLAP_ERR_META_KEY_NOT_FOUND | -3002 | Metadata key not found | -| OLAP_ERR_META_GET | -3003 | GET metadata error | -| OLAP_ERR_META_PUT | -3004 | PUT metadata error | -| OLAP_ERR_META_ITERATOR | -3005 | Metadata iterator error | -| OLAP_ERR_META_DELETE | -3006 | Delete metadata error | -| OLAP_ERR_META_ALREADY_EXIST | -3007 | Metadata already has an error | -| Rowset error code | | | -| OLAP_ERR_ROWSET_WRITER_INIT | -3100 | Rowset write initialization error | -| OLAP_ERR_ROWSET_SAVE_FAILED | -3101 | Rowset save failed | -| OLAP_ERR_ROWSET_GENERATE_ID_FAILED | -3102 | Rowset failed to generate ID | -| OLAP_ERR_ROWSET_DELETE_FILE_FAILED | -3103 | Rowset failed to delete file | -| OLAP_ERR_ROWSET_BUILDER_INIT | -3104 | Rowset initialization failed to build | -| OLAP_ERR_ROWSET_TYPE_NOT_FOUND | -3105 | Rowset type not found | -| OLAP_ERR_ROWSET_ALREADY_EXIST | -3106 | Rowset already exists | -| OLAP_ERR_ROWSET_CREATE_READER | -3107 | Rowset failed to create read object | -| OLAP_ERR_ROWSET_INVALID | -3108 | Rowset is invalid | -| OLAP_ERR_ROWSET_LOAD_FAILED | -3109 | Rowset load failed | -| OLAP_ERR_ROWSET_READER_INIT | -3110 | Rowset read object initialization failed | -| OLAP_ERR_ROWSET_READ_FAILED | -3111 | Rowset read failure | -| OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION | -3112 | Rowset invalid transaction state | - - - diff --git a/docs/en/administrator-guide/operation/disk-capacity.md b/docs/en/administrator-guide/operation/disk-capacity.md deleted file mode 100644 index 77473cf775..0000000000 --- a/docs/en/administrator-guide/operation/disk-capacity.md +++ /dev/null @@ -1,169 +0,0 @@ ---- -{ - "title": "Disk Capacity Management", - "language": "en" -} ---- - - - -# Disk Capacity Management - -This document mainly introduces system parameters and processing strategies related to disk storage capacity. - -If Doris' data disk capacity is not controlled, the process will hang because the disk is full. Therefore, we monitor the disk usage and remaining capacity, and control various operations in the Doris system by setting different warning levels, and try to avoid the situation where the disk is full. - -## Glossary - -* FE: Doris Frontend Node. Responsible for metadata management and request access. -* BE: Doris Backend Node. Responsible for query execution and data storage. -* Data Dir: Data directory, each data directory specified in the `storage_root_path` of the BE configuration file `be.conf`. Usually a data directory corresponds to a disk, so the following **disk** also refers to a data directory. - -## Basic Principles - -BE will report disk usage to FE on a regular basis (every minute). FE records these statistical values and restricts various operation requests based on these statistical values. - -Two thresholds, **High Watermark** and **Flood Stage**, are set in FE. Flood Stage is higher than High Watermark. When the disk usage is higher than High Watermark, Doris will restrict the execution of certain operations (such as replica balancing, etc.). If it is higher than Flood Stage, certain operations (such as load data) will be prohibited. - -At the same time, a **Flood Stage** is also set on the BE. Taking into account that FE cannot fully detect the disk usage on BE in a timely manner, and cannot control certain BE operations (such as Compaction). Therefore, Flood Stage on the BE is used for the BE to actively refuse and stop certain operations to achieve the purpose of self-protection. - -## FE Parameter - -**High Watermark:** - -``` -storage_high_watermark_usage_percent: default value is 85 (85%). -storage_min_left_capacity_bytes: default value is 2GB. -``` - -When disk capacity **more than** `storage_high_watermark_usage_percent`, **or** disk free capacity **less than** `storage_min_left_capacity_bytes`, the disk will no longer be used as the destination path for the following operations: - -* Tablet Balance -* Colocation Relocation -* Decommission - -**Flood Stage:** - -``` -storage_flood_stage_usage_percent: default value is 95 (95%). -storage_flood_stage_left_capacity_bytes: default value is 1GB. -``` - -When disk capacity **more than** `storage_flood_stage_usage_percent`, **or** disk free capacity **less than** `storage_flood_stage_left_capacity_bytes`, the disk will no longer be used as the destination path for the following operations: - -* Tablet Balance -* Colocation Relocation -* Replica make up -* Restore -* Load/Insert - -## BE Parameter - -**Flood Stage:** - -``` -capacity_used_percent_flood_stage: default value is 95 (95%). -capacity_min_left_bytes_flood_stage: default value is 1GB. -``` - -When disk capacity **more than** `storage_flood_stage_usage_percent`, **and** disk free capacity **less than** `storage_flood_stage_left_capacity_bytes`, the following operations on this disk will be prohibited: - -* Base/Cumulative Compaction -* Data load -* Clone Task (Usually occurs when the replica is repaired or balanced.) -* Push Task (Occurs during the Loading phase of Hadoop import, and the file is downloaded. ) -* Alter Task (Schema Change or Rollup Task.) -* Download Task (The Downloading phase of the recovery operation.) - -## Disk Capacity Release - -When the disk capacity is higher than High Watermark or even Flood Stage, many operations will be prohibited. At this time, you can try to reduce the disk usage and restore the system in the following ways. - -* Delete table or partition - - By deleting tables or partitions, you can quickly reduce the disk space usage and restore the cluster. - **Note: Only the `DROP` operation can achieve the purpose of quickly reducing the disk space usage, the `DELETE` operation cannot.** - - ``` - DROP TABLE tbl; - ALTER TABLE tbl DROP PARTITION p1; - ``` - -* BE expansion - - After backend expansion, data tablets will be automatically balanced to BE nodes with lower disk usage. The expansion operation will make the cluster reach a balanced state in a few hours or days depending on the amount of data and the number of nodes. - -* Modify replica of a table or partition - - You can reduce the number of replica of a table or partition. For example, the default 3 replica can be reduced to 2 replica. Although this method reduces the reliability of the data, it can quickly reduce the disk usage rate and restore the cluster to normal. - This method is usually used in emergency recovery systems. Please restore the number of copies to 3 after reducing the disk usage rate by expanding or deleting data after recovery. - Modifying the replica operation takes effect instantly, and the backends will automatically and asynchronously delete the redundant replica. - - ``` - ALTER TABLE tbl MODIFY PARTITION p1 SET("replication_num" = "2"); - ``` - -* Delete unnecessary files - - When the BE has crashed because the disk is full and cannot be started (this phenomenon may occur due to untimely detection of FE or BE), you need to delete some temporary files in the data directory to ensure that the BE process can start. - Files in the following directories can be deleted directly: - - * log/: Log files in the log directory. - * snapshot/: Snapshot files in the snapshot directory. - * trash/ Trash files in the trash directory. - - **This operation will affect [Restore data from BE Recycle Bin](./tablet-restore-tool.md).** - - If the BE can still be started, you can use `ADMIN CLEAN TRASH ON(BackendHost:BackendHeartBeatPort);` to actively clean up temporary files. **all trash files** and expired snapshot files will be cleaned up, **This will affect the operation of restoring data from the trash bin**. - - - If you do not manually execute `ADMIN CLEAN TRASH`, the system will still automatically execute the cleanup within a few minutes to tens of minutes.There are two situations as follows: - * If the disk usage does not reach 90% of the **Flood Stage**, expired trash files and expired snapshot files will be cleaned up. At this time, some recent files will be retained without affecting the recovery of data. - * If the disk usage has reached 90% of the **Flood Stage**, **all trash files** and expired snapshot files will be cleaned up, **This will affect the operation of restoring data from the trash bin**. - - The time interval for automatic execution can be changed by `max_garbage_sweep_interval` and `max_garbage_sweep_interval` in the configuration items. - - When the recovery fails due to lack of trash files, the following results may be returned: - - ``` - {"status": "Fail","msg": "can find tablet path in trash"} - ``` - -* Delete data file (dangerous!!!) - - When none of the above operations can free up capacity, you need to delete data files to free up space. The data file is in the `data/` directory of the specified data directory. To delete a tablet, you must first ensure that at least one replica of the tablet is normal, otherwise **deleting the only replica will result in data loss**. - - Suppose we want to delete the tablet with id 12345: - - * Find the directory corresponding to Tablet, usually under `data/shard_id/tablet_id/`. like: - - ```data/0/12345/``` - - * Record the tablet id and schema hash. The schema hash is the name of the next-level directory of the previous step. The following is 352781111: - - ```data/0/12345/352781111``` - - * Delete the data directory: - - ```rm -rf data/0/12345/``` - - * Delete tablet metadata (refer to [Tablet metadata management tool](./tablet-meta-tool.md)) - - ```./lib/meta_tool --operation=delete_header --root_path=/path/to/root_path --tablet_id=12345 --schema_hash= 352781111``` \ No newline at end of file diff --git a/docs/en/administrator-guide/operation/metadata-operation.md b/docs/en/administrator-guide/operation/metadata-operation.md deleted file mode 100644 index df8023cef9..0000000000 --- a/docs/en/administrator-guide/operation/metadata-operation.md +++ /dev/null @@ -1,404 +0,0 @@ ---- -{ - "title": "Metadata Operations and Maintenance", - "language": "en" -} ---- - - - -# Metadata Operations and Maintenance - -This document focuses on how to manage Doris metadata in a real production environment. It includes the proposed deployment of FE nodes, some commonly used operational methods, and common error resolution methods. - -For the time being, read the [Doris metadata design document](../../internal/metadata-design_EN.md) to understand how Doris metadata works. - -## Important tips - -* Current metadata design is not backward compatible. That is, if the new version has a new metadata structure change (you can see whether there is a new VERSION in the `FeMetaVersion.java` file in the FE code), it is usually impossible to roll back to the old version after upgrading to the new version. Therefore, before upgrading FE, be sure to test metadata compatibility according to the operations in the [Upgrade Document](../../installing/upgrade_EN.md). - -## Metadata catalog structure - -Let's assume that the path of `meta_dir` specified in fe.conf is `path/to/palo-meta`. In a normal Doris cluster, the directory structure of metadata should be as follows: - -``` -/path/to/palo-meta/ - |-- bdb/ - | |-- 00000000.jdb - | |-- je.config.csv - | |-- je.info.0 - | |-- je.info.0.lck - | |-- je.lck - | `-- je.stat.csv - `-- image/ - |-- ROLE - |-- VERSION - `-- image.xxxx -``` - -1. bdb - - We use [bdbje](https://www.oracle.com/technetwork/database/berkeleydb/overview/index-093405.html) as a distributed kV system to store metadata journal. This BDB directory is equivalent to the "data directory" of bdbje. - - The `.jdb` suffix is the data file of bdbje. These data files will increase with the increasing number of metadata journals. When Doris regularly completes the image, the old log is deleted. So normally, the total size of these data files varies from several MB to several GB (depending on how Doris is used, such as import frequency). When the total size of the data file is larger than 10GB, you may need to wonder whether the image failed or the historical journals that failed to distribute the image could not be deleted. - - ` je.info.0 ` is the running log of bdbje. The time in this log is UTC + 0 time zone. We may fix this in a later version. From this log, you can also see how some bdbje works. - -2. image directory - - The image directory is used to store metadata mirrors generated regularly by Doris. Usually, you will see a `image.xxxxx` mirror file. Where `xxxxx` is a number. This number indicates that the image contains all metadata journal before `xxxx`. And the generation time of this file (viewed through `ls -al`) is usually the generation time of the mirror. - - You may also see a `image.ckpt` file. This is a metadata mirror being generated. The `du -sh` command should show that the file size is increasing, indicating that the mirror content is being written to the file. When the mirror is written, it automatically renames itself to a new `image.xxxxx` and replaces the old image file. - - Only FE with a Master role will actively generate image files on a regular basis. After each generation, FE is pushed to other non-Master roles. When it is confirmed that all other FEs have received this image, Master FE deletes the metadata journal in bdbje. Therefore, if image generation fails or image push fails to other FEs, data in bdbje will accumulate. - - `ROLE` file records the type of FE (FOLLOWER or OBSERVER), which is a text file. - - `VERSION` file records the cluster ID of the Doris cluster and the token used to access authentication between nodes, which is also a text file. - - `ROLE` file and `VERSION` file may only exist at the same time, or they may not exist at the same time (e.g. at the first startup). - -## Basic operations - -### Start single node FE - -Single node FE is the most basic deployment mode. A complete Doris cluster requires at least one FE node. When there is only one FE node, the type of the node is Follower and the role is Master. - -1. First start-up - - 1. Suppose the path of `meta_dir` specified in fe.conf is `path/to/palo-meta`. - 2. Ensure that `path/to/palo-meta` already exists, that the permissions are correct and that the directory is empty. - 3. Start directly through `sh bin/start_fe.sh`. - 4. After booting, you should be able to see the following log in fe.log: - - * Palo FE starting... - * image does not exist: /path/to/palo-meta/image/image.0 - * transfer from INIT to UNKNOWN - * transfer from UNKNOWN to MASTER - * the very first time to open bdb, dbname is 1 - * start fencing, epoch number is 1 - * finish replay in xxx msec - * QE service start - * thrift server started - - The above logs are not necessarily strictly in this order, but they are basically similar. - - 5. The first start-up of a single-node FE usually does not encounter problems. If you haven't seen the above logs, generally speaking, you haven't followed the document steps carefully, please read the relevant wiki carefully. - -2. Restart - - 1. Stopped FE nodes can be restarted by using `sh bin/start_fe.sh`. - 2. After restarting, you should be able to see the following log in fe.log: - - * Palo FE starting... - * finished to get cluster id: xxxx, role: FOLLOWER and node name: xxxx - * If no image has been generated before reboot, you will see: - * image does not exist: /path/to/palo-meta/image/image.0 - - * If an image is generated before the restart, you will see: - * start load image from /path/to/palo-meta/image/image.xxx. is ckpt: false - * finished load image in xxx ms - - * transfer from INIT to UNKNOWN - * replayed journal id is xxxx, replay to journal id is yyyy - * transfer from UNKNOWN to MASTER - * finish replay in xxx msec - * master finish replay journal, can write now. - * begin to generate new image: image.xxxx - * start save image to /path/to/palo-meta/image/image.ckpt. is ckpt: true - * finished save image /path/to/palo-meta/image/image.ckpt in xxx ms. checksum is xxxx - * push image.xxx to other nodes. totally xx nodes, push successed xx nodes - * QE service start - * thrift server started - - The above logs are not necessarily strictly in this order, but they are basically similar. - -3. Common problems - - For the deployment of single-node FE, start-stop usually does not encounter any problems. If you have any questions, please refer to the relevant Wiki and check your operation steps carefully. - -### Add FE - -Adding FE processes is described in detail in the [Deployment and Upgrade Documents](../../installing/install-deploy.md#Adding%20FE%20nodes) and will not be repeated. Here are some points for attention, as well as common problems. - -1. Notes - - * Before adding a new FE, make sure that the current Master FE runs properly (connection is normal, JVM is normal, image generation is normal, bdbje data directory is too large, etc.) - * The first time you start a new FE, you must make sure that the `--helper` parameter is added to point to Master FE. There is no need to add `--helper` when restarting. (If `--helper` is specified, FE will directly ask the helper node for its role. If not, FE will try to obtain information from `ROLE` and `VERSION` files in the `palo-meta/image/` directory. - * The first time you start a new FE, you must make sure that the `meta_dir` of the FE is created, has correct permissions and is empty. - * Starting a new FE and executing the `ALTER SYSTEM ADD FOLLOWER/OBSERVER` statement adds FE to metadata in a sequence that is not required. If a new FE is started first and no statement is executed, the `current node is not added to the group. Please add it first.` in the new FE log. When the statement is executed, it enters the normal process. - * Make sure that after the previous FE is added successfully, the next FE is added. - * Connect to MASTER FE and execute `ALTER SYSTEM ADD FOLLOWER/OBSERVER` stmt. - -2. Common problems - - 1. this need is DETACHED - - When you first start a FE to be added, if the data in palo-meta/bdb on Master FE is large, you may see the words `this node is DETACHED`. in the FE log to be added. At this point, bdbje is copying data, and you can see that the `bdb/` directory of FE to be added is growing. This process usually takes several minutes (depending on the amount of data in bdbje). Later, there may be some bdbje-related error stack information in fe. log. If `QE service start` and `thrift server start` are displayed in the final log, the start is usually successful. You can try to connect this FE via mysql-client. If these words do not appear, it may be the problem of bdbje replication log timeout. At this point, restarting the FE directly will usually solve the problem. - - 2. Failure to add due to various reasons - - * If OBSERVER is added, because OBSERVER-type FE does not participate in the majority of metadata writing, it can theoretically start and stop at will. Therefore, for the case of adding OBSERVER failure. The process of OBSERVER FE can be killed directly. After clearing the metadata directory of OBSERVER, add the process again. - - * If FOLLOWER is added, because FOLLOWER is mostly written by participating metadata. So it is possible that FOLLOWER has joined the bdbje electoral team. If there are only two FOLLOWER nodes (including MASTER), then stopping one FE may cause another FE to quit because it cannot write most of the time. At this point, we should first delete the newly added FOLLOWER node from the metadata through the `ALTER SYSTEM DROP FOLLOWER` command, then kill the FOLLOWER process, empty the metadata and re-add the process. - - -### Delete FE - -The corresponding type of FE can be deleted by the `ALTER SYSTEM DROP FOLLOWER/OBSERVER` command. The following points should be noted: - -* For OBSERVER type FE, direct DROP is enough, without risk. - -* For FOLLOWER type FE. First, you should make sure that you start deleting an odd number of FOLLOWERs (three or more). - - 1. If the FE of non-MASTER role is deleted, it is recommended to connect to MASTER FE, execute DROP command, and then kill the process. - 2. If you want to delete MASTER FE, first confirm that there are odd FOLLOWER FE and it works properly. Then kill the MASTER FE process first. At this point, a FE will be elected MASTER. After confirming that the remaining FE is working properly, connect to the new MASTER FE and execute the DROP command to delete the old MASTER FE. - -## Advanced Operations - -### Failure recovery - -FE may fail to start bdbje and synchronize between FEs for some reasons. Phenomena include the inability to write metadata, the absence of MASTER, and so on. At this point, we need to manually restore the FE. The general principle of manual recovery of FE is to start a new MASTER through metadata in the current `meta_dir`, and then add other FEs one by one. Please follow the following steps strictly: - -1. First, stop all FE processes and all business access. Make sure that during metadata recovery, external access will not lead to other unexpected problems. - -2. Identify which FE node's metadata is up-to-date: - - * First of all, **be sure to back up all FE's `meta_dir` directories first.** - * Usually, Master FE's metadata is up to date. You can see the suffix of image.xxxx file in the `meta_dir/image` directory. The larger the number, the newer the metadata. - * Usually, by comparing all FOLLOWER FE image files, you can find the latest metadata. - * After that, we use the FE node with the latest metadata to recover. - * If using metadata of OBSERVER node to recover will be more troublesome, it is recommended to choose FOLLOWER node as far as possible. - -3. The following operations are performed on the FE nodes selected in step 2. - - 1. If the node is an OBSERVER, first change the `role=OBSERVER` in the `meta_dir/image/ROLE` file to `role=FOLLOWER`. (Recovery from the OBSERVER node will be more cumbersome, first follow the steps here, followed by a separate description) - 2. Add configuration in fe.conf: `metadata_failure_recovery=true`. - 3. Run `sh bin/start_fe.sh` to start the FE - 4. If normal, the FE will start in the role of MASTER, similar to the description in the previous section `Start a single node FE`. You should see the words `transfer from XXXX to MASTER` in fe.log. - 5. After the start-up is completed, connect to the FE first, and execute some query imports to check whether normal access is possible. If the operation is not normal, it may be wrong. It is recommended to read the above steps carefully and try again with the metadata previously backed up. If not, the problem may be more serious. - 6. If successful, through the `show frontends;` command, you should see all the FEs you added before, and the current FE is master. - 7. Delete the `metadata_failure_recovery=true` configuration item in fe.conf, or set it to `false`, and restart the FE (**Important**). - - - > If you are recovering metadata from an OBSERVER node, after completing the above steps, you will find that the current FE role is OBSERVER, but `IsMaster` appears as `true`. This is because the "OBSERVER" seen here is recorded in Doris's metadata, but whether it is master or not, is recorded in bdbje's metadata. Because we recovered from an OBSERVER node, there was inconsistency. Please take the following steps to fix this problem (we will fix it in a later version): - - > 1. First, all FE nodes except this "OBSERVER" are DROPed out. - > 2. A new FOLLOWER FE is added through the `ADD FOLLOWER` command, assuming that it is on hostA. - > 3. Start a new FE on hostA and join the cluster by `helper`. - > 4. After successful startup, you should see two FEs through the `show frontends;` statement, one is the previous OBSERVER, the other is the newly added FOLLOWER, and the OBSERVER is the master. - > 5. After confirming that the new FOLLOWER is working properly, the new FOLLOWER metadata is used to perform a failure recovery operation again. - > 6. The purpose of the above steps is to manufacture a metadata of FOLLOWER node artificially, and then use this metadata to restart fault recovery. This avoids inconsistencies in recovering metadata from OBSERVER. - - >The meaning of `metadata_failure_recovery = true` is to empty the metadata of `bdbje`. In this way, bdbje will not contact other FEs before, but start as a separate FE. This parameter needs to be set to true only when restoring startup. After recovery, it must be set to false. Otherwise, once restarted, the metadata of bdbje will be emptied again, which will make other FEs unable to work properly. - -4. After the successful execution of step 3, we delete the previous FEs from the metadata by using the `ALTER SYSTEM DROP FOLLOWER/OBSERVER` command and add them again by adding new FEs. - -5. If the above operation is normal, it will be restored. - -### FE type change - -If you need to change the existing FOLLOWER/OBSERVER type FE to OBSERVER/FOLLOWER type, please delete FE in the way described above, and then add the corresponding type FE. - -### FE Migration - -If you need to migrate one FE from the current node to another, there are several scenarios. - -1. FOLLOWER, or OBSERVER migration for non-MASTER nodes - - After adding a new FOLLOWER / OBSERVER directly, delete the old FOLLOWER / OBSERVER. - -2. Single-node MASTER migration - - When there is only one FE, refer to the `Failure Recovery` section. Copy the palo-meta directory of FE to the new node and start the new MASTER in Step 3 of the `Failure Recovery` section - -3. A set of FOLLOWER migrates from one set of nodes to another set of new nodes - - Deploy FE on the new node and add the new node first by adding FOLLOWER. The old nodes can be dropped by DROP one by one. In the process of DROP-by-DROP, MASTER automatically selects the new FOLLOWER node. - -### Replacement of FE port - -FE currently has the following ports - -* Ed_log_port: bdbje's communication port -* http_port: http port, also used to push image -* rpc_port: thrift server port of Frontend -* query_port: Mysql connection port - -1. edit_log_port - - If this port needs to be replaced, it needs to be restored with reference to the operations in the `Failure Recovery` section. Because the port has been persisted into bdbje's own metadata (also recorded in Doris's own metadata), it is necessary to clear bdbje's metadata by setting `metadata_failure_recovery=true`. - -2. http_port - - All FE http_ports must be consistent. So if you want to modify this port, all FEs need to be modified and restarted. Modifying this port will be more complex in the case of multiple FOLLOWER deployments (involving laying eggs and laying hens...), so this operation is not recommended. If necessary, follow the operation in the `Failure Recovery` section directly. - -3. rpc_port - - After modifying the configuration, restart FE directly. Master FE informs BE of the new port through heartbeat. Only this port of Master FE will be used. However, it is still recommended that all FE ports be consistent. - -4. query_port - - After modifying the configuration, restart FE directly. This only affects mysql's connection target. - -### Recover metadata from FE memory -In some extreme cases, the image file on the disk may be damaged, but the metadata in the memory is intact. At this point, we can dump the metadata from the memory and replace the image file on the disk to recover the metadata. the entire non-stop query service operation steps are as follows: - -1. Stop all Load, Create, Alter operations. - -2. Execute the following command to dump metadata from the Master FE memory: (hereafter called image_mem) -``` -curl -u $root_user:$password http://$master_hostname:8030/dump -``` -3. Execute the following command to verify the integrity and correctness of the generated image_mem file: -``` -sh start_fe.sh --image path_to_image_mem -``` - -> note: `path_to_image_mem` is the path to the image_mem file. -> -> If the file is valid, the output will be `Load image success. Image file /absolute/path/to/image.xxxxxx valid`. -> -> If the file is invalid, the output will be `Load image failed. Image file /absolute/path/to/image.xxxxxx is invalid`. - -4. Replace the image file in the `meta_dir/image` directory on the OBSERVER/FOLLOWER FE node with the image_mem file in turn, restart the FOLLOWER FE node, and confirm that the metadata and query services are normal. - -5. Replace the image file in the `meta_dir/image` directory on the Master FE node with the image_mem file, restart the Master FE node, and then confirm that the FE Master switch is normal and The Master FE node can generate a new image file through checkpoint. - -6. Recover all Load, Create, Alter operations. - -**Note: If the Image file is large, the entire process can take a long time, so during this time, make sure Master FE does not generate a new image file via checkpoint. When the image.ckpt file in the meta_dir/image directory on the Master FE node is observed to be as large as the image.xxx file, the image.ckpt file can be deleted directly.** - -### View data in BDBJE - -The metadata log of FE is stored in BDBJE in the form of Key-Value. In some abnormal situations, FE may not be started due to metadata errors. In this case, Doris provides a way to help users query the data stored in BDBJE to facilitate troubleshooting. - -First, you need to add configuration in fe.conf: `enable_bdbje_debug_mode=true`, and then start FE through `sh start_fe.sh --daemon`. - -At this time, FE will enter the debug mode, only start the http server and MySQL server, and open the BDBJE instance, but will not load any metadata and other subsequent startup processes. - -This is, we can view the data stored in BDBJE by visiting the web page of FE, or after connecting to Doris through the MySQL client, through `show proc /bdbje;`. - -``` -mysql> show proc "/bdbje"; -+----------+---------------+---------+ -| DbNames | JournalNumber | Comment | -+----------+---------------+---------+ -| 110589 | 4273 | | -| epochDB | 4 | | -| metricDB | 430694 | | -+----------+---------------+---------+ -``` - -The first level directory will display all the database names in BDBJE and the number of entries in each database. - -``` -mysql> show proc "/bdbje/110589"; -+-----------+ -| JournalId | -+-----------+ -| 1 | -| 2 | - -... -| 114858 | -| 114859 | -| 114860 | -| 114861 | -+-----------+ -4273 rows in set (0.06 sec) -``` - -Entering the second level, all the entry keys under the specified database will be listed. - -``` -mysql> show proc "/bdbje/110589/114861"; -+-----------+--------------+---------------------------------------------+ -| JournalId | OpType | Data | -+-----------+--------------+---------------------------------------------+ -| 114861 | OP_HEARTBEAT | org.apache.doris.persist.HbPackage@6583d5fb | -+-----------+--------------+---------------------------------------------+ -1 row in set (0.05 sec) -``` - -The third level can display the value information of the specified key. - -## Best Practices - -The deployment recommendation of FE is described in the Installation and [Deployment Document](../../installing/install-deploy_EN.md). Here are some supplements. - -* **If you don't know the operation logic of FE metadata very well, or you don't have enough experience in the operation and maintenance of FE metadata, we strongly recommend that only one FOLLOWER-type FE be deployed as MASTER in practice, and the other FEs are OBSERVER, which can reduce many complex operation and maintenance problems.** Don't worry too much about the failure of MASTER single point to write metadata. First, if you configure it properly, FE as a java process is very difficult to hang up. Secondly, if the MASTER disk is damaged (the probability is very low), we can also use the metadata on OBSERVER to recover manually through `fault recovery`. - -* The JVM of the FE process must ensure sufficient memory. We **strongly recommend** that FE's JVM memory should be at least 10GB and 32GB to 64GB. And deploy monitoring to monitor JVM memory usage. Because if OOM occurs in FE, metadata writing may fail, resulting in some failures that **cannot recover**! - -* FE nodes should have enough disk space to prevent the excessive metadata from causing insufficient disk space. At the same time, FE logs also take up more than a dozen gigabytes of disk space. - -## Other common problems - -1. Output `meta out of date. current time: xxx, synchronized time: xxx, has log: xxx, fe type: xxx` in fe.log - - This is usually because the FE cannot elect Master. For example, if three FOLLOWERs are configured, but only one FOLLOWER is started, this FOLLOWER will cause this problem. Usually, just start the remaining FOLLOWER. If the problem has not been solved after the start-up, manual recovery may be required in accordance with the way in the `Failure Recovery` section. - -2. `Clock delta: xxxx ms. between Feeder: xxxx and this Replica exceeds max permissible delta: xxxx ms.` - - Bdbje requires that clock errors between nodes should not exceed a certain threshold. If exceeded, the node will exit abnormally. The default threshold is 5000ms, which is controlled by FE parameter `max_bdbje_clock_delta_ms', and can be modified as appropriate. But we suggest using NTP and other clock synchronization methods to ensure the clock synchronization of Doris cluster hosts. - - -3. Mirror files in the `image/` directory have not been updated for a long time - - Master FE generates a mirror file by default for every 50,000 metadata journal. In a frequently used cluster, a new image file is usually generated every half to several days. If you find that the image file has not been updated for a long time (for example, more than a week), you can see the reasons in sequence as follows: - - 1. Search for `memory is not enough to do checkpoint. Committed memroy XXXX Bytes, used memory XXXX Bytes. ` in the fe.log of Master FE. If found, it indicates that the current FE's JVM memory is insufficient for image generation (usually we need to reserve half of the FE memory for image generation). Then you need to add JVM memory and restart FE before you can observe. Each time Master FE restarts, a new image is generated directly. This restart method can also be used to actively generate new images. Note that if there are multiple FOLLOWER deployments, then when you restart the current Master FE, another FOLLOWER FE will become MASTER, and subsequent image generation will be the responsibility of the new Master. Therefore, you may need to modify the JVM memory configuration of all FOLLOWER FE. - - 2. Search for `begin to generate new image: image.xxxx` in the fe.log of Master FE. If it is found, then the image is generated. Check the subsequent log of this thread, and if `checkpoint finished save image.xxxx` appears, the image is written successfully. If `Exception when generating new image file` occurs, the generation fails and specific error messages need to be viewed. - - -4. The size of the `bdb/` directory is very large, reaching several Gs or more. - - The BDB directory will remain large for some time after eliminating the error that the new image cannot be generated. Maybe it's because Master FE failed to push image. You can search `push image.XXXX to other nodes. totally XX nodes, push successed YY nodes` in the fe. log of Master FE. If YY is smaller than xx, then some FEs are not pushed successfully. You can see the specific error `Exception when pushing image file.url = xxx` in the fe. log. - - At the same time, you can add the configuration in the FE configuration file: `edit_log_roll_num = xxxx`. This parameter sets the number of metadata journals and makes an image once. The default is 50000. This number can be reduced appropriately to make images more frequent, thus speeding up the deletion of old journals. - -5. FOLLOWER FE hangs up one after another - - Because Doris's metadata adopts the majority writing strategy, that is, a metadata journal must be written to at least a number of FOLLOWER FEs (for example, three FOLLOWERs, two must be written successfully) before it can be considered successful. If the write fails, the FE process exits on its own initiative. So suppose there are three FOLLOWERs: A, B and C. C hangs up first, and then B hangs up, then A will hang up. So as described in the `Best Practices `section, if you don't have extensive experience in metadata operations and maintenance, it's not recommended to deploy multiple FOLLOWERs. - -6. fe.log 中出现 `get exception when try to close previously opened bdb database. ignore it` - - If there is the word `ignore it` behind it, there is usually no need to deal with it. If you are interested, you can search for this error in `BDBEnvironment.java`, and see the annotations. - -7. From `show frontends;` Look, the `Join` of a FE is listed as `true`, but actually the FE is abnormal. - - Through `show frontends;` see the `Join` information. If the column is `true`, it only means that the FE **has joined the** cluster. It does not mean that it still exists normally in the cluster. If `false`, it means that the FE **has never joined the** cluster. - -8. Configuration of FE `master_sync_policy`, `replica_sync_policy`, and `txn_rollback_limit.` - - `master_sync_policy` is used to specify whether fsync (), `replica_sync_policy` is called when Leader FE writes metadata log, and `replica_sync_policy` is used to specify whether other Follower FE calls fsync () when FE HA deploys synchronous metadata. In earlier versions of Oris, these two parameters defaulted to `WRITE_NO_SYNC`, i.e., fsync () was not called. In the latest version of Oris, the default has been changed to `SYNC`, that is, fsync () is called. Calling fsync () significantly reduces the efficiency of metadata disk writing. In some environments, IOPS may drop to several hundred and the latency increases to 2-3ms (but it's still enough for Doris metadata manipulation). Therefore, we recommend the following configuration: - - 1. For a single Follower FE deployment, `master_sync_policy` is set to `SYNC`, which prevents the loss of metadata due to the downtime of the FE system. - 2. For multi-Follower FE deployment, we can set `master_sync_policy` and `replica_sync_policy` to `WRITE_NO_SYNC`, because we think that the probability of simultaneous outage of multiple systems is very low. - - If `master_sync_policy` is set to `WRITE_NO_SYNC` in a single Follower FE deployment, then a FE system outage may occur, resulting in loss of metadata. At this point, if other Observer FE attempts to restart, it may report an error: - - ``` - Node xxx must rollback xx total commits(numPassedDurableCommits of which were durable) to the earliest point indicated by transaction xxxx in order to rejoin the replication group, but the transaction rollback limit of xxx prohibits this. - ``` - -This means that some transactions that have been persisted need to be rolled back, but the number of entries exceeds the upper limit. Here our default upper limit is 100, which can be changed by setting `txn_rollback_limit`. This operation is only used to attempt to start FE normally, but lost metadata cannot be recovered. diff --git a/docs/en/administrator-guide/operation/tablet-meta-tool.md b/docs/en/administrator-guide/operation/tablet-meta-tool.md deleted file mode 100644 index 94badd114b..0000000000 --- a/docs/en/administrator-guide/operation/tablet-meta-tool.md +++ /dev/null @@ -1,145 +0,0 @@ ---- -{ - "title": "Tablet metadata management tool", - "language": "en" -} ---- - - - -# Tablet metadata management tool - -## Background - -In the latest version of the code, we introduced RocksDB in BE to store meta-information of tablet, in order to solve various functional and performance problems caused by storing meta-information through header file. Currently, each data directory (root path) has a corresponding RocksDB instance, in which all tablets on the corresponding root path are stored in the key-value manner. - -To facilitate the maintenance of these metadata, we provide an online HTTP interface and an offline meta tool to complete related management operations. - -The HTTP interface is only used to view tablet metadata online, and can be used when the BE process is running. - -However, meta tool is only used for off-line metadata management operations. BE must be stopped before it can be used. - -The meta tool tool is stored in the Lib / directory of BE. - -## Operation - -### View Tablet Meta - -Viewing Tablet Meta information can be divided into online and offline methods - -#### Online - -Access BE's HTTP interface to obtain the corresponding Tablet Meta information: - -api: - -`http://{host}:{port}/api/meta/header/{tablet_id}` - - -> Host: be Hostname -> -> port: BE's HTTP port -> -> tablet id: tablet id - -Give an example: - -`http://be_host:8040/api/meta/header/14156` - -If the final query is successful, the Tablet Meta will be returned as json. - -#### Offline - -Get Tablet Meta on a disk based on the meta\ tool tool. - -Command: - -``` -./lib/meta_tool --root_path=/path/to/root_path --operation=get_meta --tablet_id=xxx --schema_hash=xxx -``` - -> root_path: The corresponding root_path path path configured in be.conf. - -The result is also a presentation of Tablet Meta in JSON format. - -### Load header - -The function of loading header is provided to realize manual migration of tablet. This function is based on Tablet Meta in JSON format, so if changes in the shard field and version information are involved, they can be changed directly in the JSON content of Tablet Meta. Then use the following commands to load. - -Command: - -``` -./lib/meta_tool --operation=load_meta --root_path=/path/to/root_path --json_header_path=path -``` - -### Delete header - -In order to realize the function of deleting a tablet meta from a disk of a BE. Support single delete and batch delete. - -Single delete: - -``` -./lib/meta_tool --operation=delete_meta --root_path=/path/to/root_path --tablet_id=xxx --schema_hash=xxx` -``` - -Batch delete: - -``` -./lib/meta_tool --operation=batch_delete_meta --tablet_file=/path/to/tablet_file.txt -``` - -Each line in `tablet_file.txt` represents the information of a tablet. The format is: - -`root_path,tablet_id,schema_hash` - -Each column are separated by comma. - -`tablet_file` example: - -``` -/output/be/data/,14217,352781111 -/output/be/data/,14219,352781111 -/output/be/data/,14223,352781111 -/output/be/data/,14227,352781111 -/output/be/data/,14233,352781111 -/output/be/data/,14239,352781111 -``` - -Batch delete will skip the line with incorrect tablet information format in `tablet_file`. And after the execution is completed, the number of successful deletions and the number of errors are displayed. - -### TabletMeta in Pb format - -This command is to view the old file-based management PB format Tablet Meta, and to display Tablet Meta in JSON format. - -Command: - -``` -./lib/meta_tool --operation=show_meta --root_path=/path/to/root_path --pb_header_path=path -``` - -### Segment meta in Pb format - -This command is to view the PB format segment meta, and to display segment meta in JSON format. - -Command: - -``` -./meta_tool --operation=show_segment_footer --file=/path/to/segment/file -``` diff --git a/docs/en/administrator-guide/operation/tablet-repair-and-balance.md b/docs/en/administrator-guide/operation/tablet-repair-and-balance.md deleted file mode 100644 index f4f3e21965..0000000000 --- a/docs/en/administrator-guide/operation/tablet-repair-and-balance.md +++ /dev/null @@ -1,774 +0,0 @@ ---- -{ - "title": "Data replica management", - "language": "en" -} ---- - - - -# Data replica management - -Beginning with version 0.9.0, Doris introduced an optimized replica management strategy and supported a richer replica status viewing tool. This document focuses on Doris data replica balancing, repair scheduling strategies, and replica management operations and maintenance methods. Help users to more easily master and manage the replica status in the cluster. - -> Repairing and balancing copies of tables with Collocation attributes can be referred to `docs/documentation/cn/administrator-guide/colocation-join.md'.` - -## Noun Interpretation - -1. Tablet: The logical fragmentation of a Doris table, where a table has multiple fragmentations. -2. Replica: A sliced copy, defaulting to three copies of a slice. -3. Healthy Replica: A healthy copy that survives at Backend and has a complete version. -4. Tablet Checker (TC): A resident background thread that scans all Tablets regularly, checks the status of these Tablets, and decides whether to send them to Tablet Scheduler based on the results. -5. Tablet Scheduler (TS): A resident background thread that handles Tablets sent by Tablet Checker that need to be repaired. At the same time, cluster replica balancing will be carried out. -6. Tablet SchedCtx (TSC): is a tablet encapsulation. When TC chooses a tablet, it encapsulates it as a TSC and sends it to TS. -7. Storage Medium: Storage medium. Doris supports specifying different storage media for partition granularity, including SSD and HDD. The replica scheduling strategy is also scheduled for different storage media. - -``` - - +--------+ +-----------+ - | Meta | | Backends | - +---^----+ +------^----+ - | | | 3. Send clone tasks - 1. Check tablets | | | - +--------v------+ +-----------------+ - | TabletChecker +--------> TabletScheduler | - +---------------+ +-----------------+ - 2. Waiting to be scheduled - - -``` -The figure above is a simplified workflow. - - -## Duplicate status - -Multiple copies of a Tablet may cause state inconsistencies due to certain circumstances. Doris will attempt to automatically fix the inconsistent copies of these states so that the cluster can recover from the wrong state as soon as possible. - -**The health status of a Replica is as follows:** - -1. BAD - - That is, the copy is damaged. Includes, but is not limited to, the irrecoverable damaged status of copies caused by disk failures, BUGs, etc. - -2. VERSION\_MISSING - - Version missing. Each batch of imports in Doris corresponds to a data version. A copy of the data consists of several consecutive versions. However, due to import errors, delays and other reasons, the data version of some copies may be incomplete. - -3. HEALTHY - - Health copy. That is, a copy of the normal data, and the BE node where the copy is located is in a normal state (heartbeat is normal and not in the offline process). - -**The health status of a Tablet is determined by the status of all its copies. There are the following categories:** - -1. REPLICA\_MISSING - - The copy is missing. That is, the number of surviving copies is less than the expected number of copies. - -2. VERSION\_INCOMPLETE - - The number of surviving copies is greater than or equal to the number of expected copies, but the number of healthy copies is less than the number of expected copies. - -3. REPLICA\_RELOCATING - - Have a full number of live copies of the replication num version, but the BE nodes where some copies are located are in unavailable state (such as decommission) - -4. REPLICA\_MISSING\_IN\_CLUSTER - - When using multi-cluster, the number of healthy replicas is greater than or equal to the expected number of replicas, but the number of replicas in the corresponding cluster is less than the expected number of replicas. - -5. REDUNDANT - - Duplicate redundancy. Healthy replicas are in the corresponding cluster, but the number of replicas is larger than the expected number. Or there's a spare copy of unavailable. - -6. FORCE\_REDUNDANT - - This is a special state. It only occurs when the number of expected replicas is greater than or equal to the number of available nodes, and when the Tablet is in the state of replica missing. In this case, you need to delete a copy first to ensure that there are available nodes for creating a new copy. - -7. COLOCATE\_MISMATCH - - Fragmentation status of tables for Collocation attributes. Represents that the distribution of fragmented copies is inconsistent with the specified distribution of Colocation Group. - -8. COLOCATE\_REDUNDANT - - Fragmentation status of tables for Collocation attributes. Represents the fragmented copy redundancy of the Colocation table. - -8. HEALTHY - - Healthy fragmentation, that is, conditions [1-5] are not satisfied. - -## Replica Repair - -As a resident background process, Tablet Checker regularly checks the status of all fragments. For unhealthy fragmentation, it will be sent to Tablet Scheduler for scheduling and repair. The actual operation of repair is accomplished by clone task on BE. FE is only responsible for generating these clone tasks. - -> Note 1: The main idea of replica repair is to make the number of fragmented replicas reach the desired value by creating or completing them first. Then delete the redundant copy. -> -> Note 2: A clone task is to complete the process of copying specified data from a specified remote end to a specified destination. - -For different states, we adopt different repair methods: - -1. REPLICA\_MISSING/REPLICA\_RELOCATING - - Select a low-load, available BE node as the destination. Choose a healthy copy as the source. Clone tasks copy a complete copy from the source to the destination. For replica completion, we will directly select an available BE node, regardless of the storage medium. - -2. VERSION\_INCOMPLETE - - Select a relatively complete copy as the destination. Choose a healthy copy as the source. The clone task attempts to copy the missing version from the source to the destination. - -3. REPLICA\_MISSING\_IN\_CLUSTER - - This state processing method is the same as REPLICAMISSING. - -4. REDUNDANT - - Usually, after repair, there will be redundant copies in fragmentation. We select a redundant copy to delete it. The selection of redundant copies follows the following priorities: - 1. The BE where the copy is located has been offline. - 2. The copy is damaged - 3. The copy is lost in BE or offline - 4. The replica is in the CLONE state (which is an intermediate state during clone task execution) - 5. The copy has version missing - 6. The cluster where the copy is located is incorrect - 7. The BE node where the replica is located has a high load - -5. FORCE\_REDUNDANT - - Unlike REDUNDANT, because at this point Tablet has a copy missing, because there are no additional available nodes for creating new copies. So at this point, a copy must be deleted to free up a available node for creating a new copy. - The order of deleting copies is the same as REDUNDANT. - -6. COLOCATE\_MISMATCH - - Select one of the replica distribution BE nodes specified in Colocation Group as the destination node for replica completion. - -7. COLOCATE\_REDUNDANT - - Delete a copy on a BE node that is distributed by a copy specified in a non-Colocation Group. - - Doris does not deploy a copy of the same Tablet on a different BE of the same host when selecting a replica node. It ensures that even if all BEs on the same host are deactivated, all copies will not be lost. - -### Scheduling priority - -Waiting for the scheduled fragments in Tablet Scheduler gives different priorities depending on the status. High priority fragments will be scheduled first. There are currently several priorities. - -1. VERY\_HIGH - - * REDUNDANT. For slices with duplicate redundancy, we give priority to them. Logically, duplicate redundancy is the least urgent, but because it is the fastest to handle and can quickly release resources (such as disk space, etc.), we give priority to it. - * FORCE\_REDUNDANT. Ditto. - -2. HIGH - - * REPLICA\_MISSING and most copies are missing (for example, 2 copies are missing in 3 copies) - * VERSION\_INCOMPLETE and most copies are missing - * COLOCATE\_MISMATCH We hope that the fragmentation related to the Collocation table can be repaired as soon as possible. - * COLOCATE\_REDUNDANT - -3. NORMAL - - * REPLICA\_MISSING, but most survive (for example, three copies lost one) - * VERSION\_INCOMPLETE, but most copies are complete - * REPLICA\_RELOCATING and relocate is required for most replicas (e.g. 3 replicas with 2 replicas) - -4. LOW - - * REPLICA\_MISSING\_IN\_CLUSTER - * REPLICA\_RELOCATING most copies stable - -### Manual priority - -The system will automatically determine the scheduling priority. Sometimes, however, users want the fragmentation of some tables or partitions to be repaired faster. So we provide a command that the user can specify that a slice of a table or partition is repaired first: - -`ADMIN REPAIR TABLE tbl [PARTITION (p1, p2, ...)];` - -This command tells TC to give VERY HIGH priority to the problematic tables or partitions that need to be repaired first when scanning Tablets. - -> Note: This command is only a hint, which does not guarantee that the repair will be successful, and the priority will change with the scheduling of TS. And when Master FE switches or restarts, this information will be lost. - -Priority can be cancelled by the following commands: - -`ADMIN CANCEL REPAIR TABLE tbl [PARTITION (p1, p2, ...)];` - -### Priority scheduling - -Priority ensures that severely damaged fragments can be repaired first, and improves system availability. But if the high priority repair task fails all the time, the low priority task will never be scheduled. Therefore, we will dynamically adjust the priority of tasks according to the running status of tasks, so as to ensure that all tasks have the opportunity to be scheduled. - -* If the scheduling fails for five consecutive times (e.g., no resources can be obtained, no suitable source or destination can be found, etc.), the priority will be lowered. -* If not scheduled for 30 minutes, priority will be raised. -* The priority of the same tablet task is adjusted at least five minutes apart. - -At the same time, in order to ensure the weight of the initial priority, we stipulate that the initial priority is VERY HIGH, and the lowest is lowered to NORMAL. When the initial priority is LOW, it is raised to HIGH at most. The priority adjustment here also adjusts the priority set manually by the user. - -## Duplicate Equilibrium - -Doris automatically balances replicas within the cluster. Currently supports two rebalance strategies, BeLoad and Partition. BeLoad rebalance will consider about the disk usage and replica count for each BE. Partition rebalance just aim at replica count for each partition, this helps to avoid hot spots. If you want high read/write performance, you may need this. Note that Partition rebalance do not consider about the disk usage, pay more attention to it when you are using Partition rebalance. The strategy selection config is not mutable at runtime. - -### BeLoad - -The main idea of balancing is to create a replica of some fragments on low-load nodes, and then delete the replicas of these fragments on high-load nodes. At the same time, because of the existence of different storage media, there may or may not exist one or two storage media on different BE nodes in the same cluster. We require that fragments of storage medium A be stored in storage medium A as far as possible after equalization. So we divide the BE nodes of the cluster according to the storage medium. Then load balancing scheduling is carried out for different BE node sets of storage media. - -Similarly, replica balancing ensures that a copy of the same table will not be deployed on the BE of the same host. - -### BE Node Load - -We use Cluster LoadStatistics (CLS) to represent the load balancing of each backend in a cluster. Tablet Scheduler triggers cluster equilibrium based on this statistic. We currently calculate a load Score for each BE as the BE load score by using **disk usage** and **number of copies**. The higher the score, the heavier the load on the BE. - -Disk usage and number of copies have a weight factor, which is **capacityCoefficient** and **replicaNumCoefficient**, respectively. The sum of them is **constant to 1**. Among them, capacityCoefficient will dynamically adjust according to actual disk utilization. When the overall disk utilization of a BE is below 50%, the capacityCoefficient value is 0.5, and if the disk utilization is above 75% (configurable through the FE configuration item `capacity_used_percent_high_water`), the value is 1. If the utilization rate is between 50% and 75%, the weight coefficient increases smoothly. The formula is as follows: - -`capacityCoefficient = 2 * Disk Utilization - 0.5` - -The weight coefficient ensures that when disk utilization is too high, the backend load score will be higher to ensure that the BE load is reduced as soon as possible. - -Tablet Scheduler updates CLS every 20 seconds. - -### Partition - -The main idea of `partition rebalancing` is to decrease the skew of partitions. The skew of the partition is defined as the difference between the maximum replica count of the partition over all bes and the minimum replica count over all bes. - -So we only consider about the replica count, do not consider replica size(disk usage). -To fewer moves, we use TwoDimensionalGreedyAlgo which two dims are cluster & partition. It prefers a move that reduce the skew of the cluster when we want to rebalance a max skew partition. - -#### Skew Info - -The skew info is represented by `ClusterBalanceInfo`. `partitionInfoBySkew` is a multimap which key is the partition's skew, so we can get max skew partitions simply. `beByTotalReplicaCount` is a multimap which key is the total replica count of the backend. - -`ClusterBalanceInfo` is in CLS, updated every 20 seconds. - -When get more than one max skew partitions, we random select one partition to calculate the move. - -### Equilibrium strategy - -Tablet Scheduler uses Load Balancer to select a certain number of healthy fragments as candidate fragments for balance in each round of scheduling. In the next scheduling, balanced scheduling will be attempted based on these candidate fragments. - -## Resource control - -Both replica repair and balancing are accomplished by replica copies between BEs. If the same BE performs too many tasks at the same time, it will bring a lot of IO pressure. Therefore, Doris controls the number of tasks that can be performed on each node during scheduling. The smallest resource control unit is the disk (that is, a data path specified in be.conf). By default, we configure two slots per disk for replica repair. A clone task occupies one slot at the source and one slot at the destination. If the number of slots is zero, no more tasks will be assigned to this disk. The number of slots can be configured by FE's `schedule_slot_num_per_path` parameter. - -In addition, by default, we provide two separate slots per disk for balancing tasks. The purpose is to prevent high-load nodes from losing space by balancing because slots are occupied by repair tasks. - -## Duplicate Status View - -Duplicate status view mainly looks at the status of the duplicate, as well as the status of the duplicate repair and balancing tasks. Most of these states **exist only in** Master FE nodes. Therefore, the following commands need to be executed directly to Master FE. - -### Duplicate status - -1. Global state checking - - Through `SHOW PROC'/ statistic'; `commands can view the replica status of the entire cluster. - - ``` - +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+ - | DbId | DbName | TableNum | PartitionNum | IndexNum | TabletNum | ReplicaNum | UnhealthyTabletNum | InconsistentTabletNum | - +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+ - | 35153636 | default_cluster:DF_Newrisk | 3 | 3 | 3 | 96 | 288 | 0 | 0 | - | 48297972 | default_cluster:PaperData | 0 | 0 | 0 | 0 | 0 | 0 | 0 | - | 5909381 | default_cluster:UM_TEST | 7 | 7 | 10 | 320 | 960 | 1 | 0 | - | Total | 240 | 10 | 10 | 13 | 416 | 1248 | 1 | 0 | - +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+ - ``` - - The `UnhealthyTabletNum` column shows how many Tablets are in an unhealthy state in the corresponding database. `The Inconsistent Tablet Num` column shows how many Tablets are in an inconsistent replica state in the corresponding database. The last `Total` line counts the entire cluster. Normally `Unhealth Tablet Num` and `Inconsistent Tablet Num` should be 0. If it's not zero, you can further see which Tablets are there. As shown in the figure above, one table in the UM_TEST database is not healthy, you can use the following command to see which one is. - - `SHOW PROC '/statistic/5909381';` - - Among them `5909381'is the corresponding DbId. - - ``` - +------------------+---------------------+ - | UnhealthyTablets | InconsistentTablets | - +------------------+---------------------+ - | [40467980] | [] | - +------------------+---------------------+ - ``` - - The figure above shows the specific unhealthy Tablet ID (40467980). Later we'll show you how to view the status of each copy of a specific Tablet. - -2. Table (partition) level status checking - - Users can view the status of a copy of a specified table or partition through the following commands and filter the status through a WHERE statement. If you look at table tbl1, the state on partitions P1 and P2 is a copy of OK: - - `ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) WHERE STATUS = "OK";` - - ``` - +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+ - | TabletId | ReplicaId | BackendId | Version | LastFailedVersion | LastSuccessVersion | CommittedVersion | SchemaHash | VersionNum | IsBad | State | Status | - +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+ - | 29502429 | 29502432 | 10006 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502429 | 36885996 | 10002 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502429 | 48100551 | 10007 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502433 | 29502434 | 10001 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502433 | 44900737 | 10004 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502433 | 48369135 | 10006 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | - +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+ - ``` - - The status of all copies is shown here. Where `IsBad` is listed as `true`, the copy is damaged. The `Status` column displays other states. Specific status description, you can see help through `HELP ADMIN SHOW REPLICA STATUS`. - - ` The ADMIN SHOW REPLICA STATUS `command is mainly used to view the health status of copies. Users can also view additional information about copies of a specified table by using the following commands: - - `SHOW TABLETS FROM tbl1;` - - ``` - +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+ - | TabletId | ReplicaId | BackendId | SchemaHash | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | DataSize | RowCount | State | LstConsistencyCheckTime | CheckVersion | CheckVersionHash | VersionCount | PathHash | MetaUrl | CompactionStatus | - +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+ - | 29502429 | 29502432 | 10006 | 1421156361 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -5822326203532286804 | url | url | - | 29502429 | 36885996 | 10002 | 1421156361 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -1441285706148429853 | url | url | - | 29502429 | 48100551 | 10007 | 1421156361 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -4784691547051455525 | url | url | - +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+ - ``` - - The figure above shows some additional information, including copy size, number of rows, number of versions, where the data path is located. - - > Note: The contents of the `State` column shown here do not represent the health status of the replica, but the status of the replica under certain tasks, such as CLONE, SCHEMA CHANGE, ROLLUP, etc. - - In addition, users can check the distribution of replicas in a specified table or partition by following commands. - - `ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1;` - - ``` - +-----------+------------+-------+---------+ - | BackendId | ReplicaNum | Graph | Percent | - +-----------+------------+-------+---------+ - | 10000 | 7 | | 7.29 % | - | 10001 | 9 | | 9.38 % | - | 10002 | 7 | | 7.29 % | - | 10003 | 7 | | 7.29 % | - | 10004 | 9 | | 9.38 % | - | 10005 | 11 | > | 11.46 % | - | 10006 | 18 | > | 18.75 % | - | 10007 | 15 | > | 15.62 % | - | 10008 | 13 | > | 13.54 % | - +-----------+------------+-------+---------+ - ``` - - Here we show the number and percentage of replicas of table tbl1 on each BE node, as well as a simple graphical display. - -4. Tablet level status checking - - When we want to locate a specific Tablet, we can use the following command to view the status of a specific Tablet. For example, check the tablet with ID 2950253: - - `SHOW TABLET 29502553;` - - ``` - +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+ - | DbName | TableName | PartitionName | IndexName | DbId | TableId | PartitionId | IndexId | IsSync | DetailCmd | - +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+ - | default_cluster:test | test | test | test | 29502391 | 29502428 | 29502427 | 29502428 | true | SHOW PROC '/dbs/29502391/29502428/partitions/29502427/29502428/29502553'; | - +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+ - ``` - - The figure above shows the database, tables, partitions, roll-up tables and other information corresponding to this tablet. The user can copy the command in the `DetailCmd` command to continue executing: - - `Show Proc'/DBS/29502391/29502428/Partitions/29502427/29502428/29502553;` - - ``` - +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+ - | ReplicaId | BackendId | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | SchemaHash | DataSize | RowCount | State | IsBad | VersionCount | PathHash | - +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+ - | 43734060 | 10004 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | -8566523878520798656 | - | 29502555 | 10002 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1885826196444191611 | - | 39279319 | 10007 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1656508631294397870 | - +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+ - ``` - - The figure above shows all replicas of the corresponding Tablet. The content shown here is the same as `SHOW TABLET FROM tbl1;`. But here you can clearly see the status of all copies of a specific Tablet. - -### Duplicate Scheduling Task - -1. View tasks waiting to be scheduled - - `SHOW PROC '/cluster_balance/pending_tablets';` - - ``` - +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+ - | TabletId | Type | Status | State | OrigPrio | DynmPrio | SrcBe | SrcPath | DestBe | DestPath | Timeout | Create | LstSched | LstVisit | Finished | Rate | FailedSched | FailedRunning | LstAdjPrio | VisibleVer | VisibleVerHash | CmtVer | CmtVerHash | ErrMsg | - +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+ - | 4203036 | REPAIR | REPLICA_MISSING | PENDING | HIGH | LOW | -1 | -1 | -1 | -1 | 0 | 2019-02-21 15:00:20 | 2019-02-24 11:18:41 | 2019-02-24 11:18:41 | N/A | N/A | 2 | 0 | 2019-02-21 15:00:43 | 1 | 0 | 2 | 0 | unable to find source replica | - +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+ - ``` - - The specific meanings of each column are as follows: - - * TabletId: The ID of the Tablet waiting to be scheduled. A scheduling task is for only one Tablet - * Type: Task type, which can be REPAIR (repair) or BALANCE (balance) - * Status: The current status of the Tablet, such as REPLICAMISSING (copy missing) - * State: The status of the scheduling task may be PENDING/RUNNING/FINISHED/CANCELLED/TIMEOUT/UNEXPECTED - * OrigPrio: Initial Priority - * DynmPrio: Current dynamically adjusted priority - * SrcBe: ID of the BE node at the source end - * SrcPath: hash value of the path of the BE node at the source end - * DestBe: ID of destination BE node - * DestPath: hash value of the path of the destination BE node - * Timeout: When the task is scheduled successfully, the timeout time of the task is displayed here in units of seconds. - * Create: The time when the task was created - * LstSched: The last time a task was scheduled - * LstVisit: The last time a task was accessed. Here "accessed" refers to the processing time points associated with the task, including scheduling, task execution reporting, and so on. - * Finished: Task End Time - * Rate: Clone Task Data Copy Rate - * Failed Sched: Number of Task Scheduling Failures - * Failed Running: Number of task execution failures - * LstAdjPrio: Time of last priority adjustment - * CmtVer/CmtVerHash/VisibleVer/VisibleVerHash: version information for clone tasks - * ErrMsg: Error messages that occur when tasks are scheduled and run - -2. View running tasks - - `SHOW PROC '/cluster_balance/running_tablets';` - - The columns in the result have the same meaning as `pending_tablets`. - -3. View completed tasks - - `SHOW PROC '/cluster_balance/history_tablets';` - - By default, we reserve only the last 1,000 completed tasks. The columns in the result have the same meaning as `pending_tablets`. If `State` is listed as `FINISHED`, the task is normally completed. For others, you can see the specific reason based on the error information in the `ErrMsg` column. - -## Viewing Cluster Load and Scheduling Resources - -1. Cluster load - - You can view the current load of the cluster by following commands: - - `SHOW PROC '/cluster_balance/cluster_load_stat';` - - First of all, we can see the division of different storage media: - - ``` - +---------------+ - | StorageMedium | - +---------------+ - | HDD | - | SSD | - +---------------+ - ``` - - Click on a storage medium to see the equilibrium state of the BE node that contains the storage medium: - - `SHOW PROC '/cluster_balance/cluster_load_stat/HDD';` - - ``` - +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+ - | BeId | Cluster | Available | UsedCapacity | Capacity | UsedPercent | ReplicaNum | CapCoeff | ReplCoeff | Score | Class | - +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+ - | 10003 | default_cluster | true | 3477875259079 | 19377459077121 | 17.948 | 493477 | 0.5 | 0.5 | 0.9284678149967587 | MID | - | 10002 | default_cluster | true | 3607326225443 | 19377459077121 | 18.616 | 496928 | 0.5 | 0.5 | 0.948660871419998 | MID | - | 10005 | default_cluster | true | 3523518578241 | 19377459077121 | 18.184 | 545331 | 0.5 | 0.5 | 0.9843539990641831 | MID | - | 10001 | default_cluster | true | 3535547090016 | 19377459077121 | 18.246 | 558067 | 0.5 | 0.5 | 0.9981869446537612 | MID | - | 10006 | default_cluster | true | 3636050364835 | 19377459077121 | 18.764 | 547543 | 0.5 | 0.5 | 1.0011489897614072 | MID | - | 10004 | default_cluster | true | 3506558163744 | 15501967261697 | 22.620 | 468957 | 0.5 | 0.5 | 1.0228319835582569 | MID | - | 10007 | default_cluster | true | 4036460478905 | 19377459077121 | 20.831 | 551645 | 0.5 | 0.5 | 1.057279369420761 | MID | - | 10000 | default_cluster | true | 4369719923760 | 19377459077121 | 22.551 | 547175 | 0.5 | 0.5 | 1.0964036415787461 | MID | - +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+ - ``` - - Some of these columns have the following meanings: - - * Available: True means that BE heartbeat is normal and not offline. - * UsedCapacity: Bytes, the size of disk space used on BE - * Capacity: Bytes, the total disk space size on BE - * UsedPercent: Percentage, disk space utilization on BE - * ReplicaNum: Number of copies on BE - * CapCoeff/ReplCoeff: Weight Coefficient of Disk Space and Copy Number - * Score: Load score. The higher the score, the heavier the load. - * Class: Classified by load, LOW/MID/HIGH. Balanced scheduling moves copies from high-load nodes to low-load nodes - - Users can further view the utilization of each path on a BE, such as the BE with ID 10001: - - `SHOW PROC '/cluster_balance/cluster_load_stat/HDD/10001';` - - ``` - +------------------+------------------+---------------+---------------+---------+--------+----------------------+ - | RootPath | DataUsedCapacity | AvailCapacity | TotalCapacity | UsedPct | State | PathHash | - +------------------+------------------+---------------+---------------+---------+--------+----------------------+ - | /home/disk4/palo | 498.757 GB | 3.033 TB | 3.525 TB | 13.94 % | ONLINE | 4883406271918338267 | - | /home/disk3/palo | 704.200 GB | 2.832 TB | 3.525 TB | 19.65 % | ONLINE | -5467083960906519443 | - | /home/disk1/palo | 512.833 GB | 3.007 TB | 3.525 TB | 14.69 % | ONLINE | -7733211489989964053 | - | /home/disk2/palo | 881.955 GB | 2.656 TB | 3.525 TB | 24.65 % | ONLINE | 4870995507205544622 | - | /home/disk5/palo | 694.992 GB | 2.842 TB | 3.525 TB | 19.36 % | ONLINE | 1916696897889786739 | - +------------------+------------------+---------------+---------------+---------+--------+----------------------+ - ``` - - The disk usage of each data path on the specified BE is shown here. - -2. Scheduling resources - - Users can view the current slot usage of each node through the following commands: - - `SHOW PROC '/cluster_balance/working_slots';` - - ``` - +----------+----------------------+------------+------------+-------------+----------------------+ - | BeId | PathHash | AvailSlots | TotalSlots | BalanceSlot | AvgRate | - +----------+----------------------+------------+------------+-------------+----------------------+ - | 10000 | 8110346074333016794 | 2 | 2 | 2 | 2.459007474009069E7 | - | 10000 | -5617618290584731137 | 2 | 2 | 2 | 2.4730105014001578E7 | - | 10001 | 4883406271918338267 | 2 | 2 | 2 | 1.6711402709780257E7 | - | 10001 | -5467083960906519443 | 2 | 2 | 2 | 2.7540126380326536E7 | - | 10002 | 9137404661108133814 | 2 | 2 | 2 | 2.417217089806745E7 | - | 10002 | 1885826196444191611 | 2 | 2 | 2 | 1.6327378456676323E7 | - +----------+----------------------+------------+------------+-------------+----------------------+ - ``` - - In this paper, data path is used as granularity to show the current use of slot. Among them, `AvgRate'is the copy rate of clone task in bytes/seconds on the path of historical statistics. - -3. Priority repair view - - The following command allows you to view the priority repaired tables or partitions set by the `ADMIN REPAIR TABLE'command. - - `SHOW PROC '/cluster_balance/priority_repair';` - - Among them, `Remaining TimeMs'indicates that these priority fixes will be automatically removed from the priority fix queue after this time. In order to prevent resources from being occupied due to the failure of priority repair. - -### Scheduler Statistical Status View - -We have collected some statistics of Tablet Checker and Tablet Scheduler during their operation, which can be viewed through the following commands: - -`SHOW PROC '/cluster_balance/sched_stat';` - -``` -+---------------------------------------------------+-------------+ -| Item | Value | -+---------------------------------------------------+-------------+ -| num of tablet check round | 12041 | -| cost of tablet check(ms) | 7162342 | -| num of tablet checked in tablet checker | 18793506362 | -| num of unhealthy tablet checked in tablet checker | 7043900 | -| num of tablet being added to tablet scheduler | 1153 | -| num of tablet schedule round | 49538 | -| cost of tablet schedule(ms) | 49822 | -| num of tablet being scheduled | 4356200 | -| num of tablet being scheduled succeeded | 320 | -| num of tablet being scheduled failed | 4355594 | -| num of tablet being scheduled discard | 286 | -| num of tablet priority upgraded | 0 | -| num of tablet priority downgraded | 1096 | -| num of clone task | 230 | -| num of clone task succeeded | 228 | -| num of clone task failed | 2 | -| num of clone task timeout | 2 | -| num of replica missing error | 4354857 | -| num of replica version missing error | 967 | -| num of replica relocating | 0 | -| num of replica redundant error | 90 | -| num of replica missing in cluster error | 0 | -| num of balance scheduled | 0 | -+---------------------------------------------------+-------------+ -``` - -The meanings of each line are as follows: - -* num of tablet check round: Tablet Checker 检查次数 -* cost of tablet check(ms): Tablet Checker 检查总耗时 -* num of tablet checked in tablet checker: Tablet Checker 检查过的 tablet 数量 -* num of unhealthy tablet checked in tablet checker: Tablet Checker 检查过的不健康的 tablet 数量 -* num of tablet being added to tablet scheduler: 被提交到 Tablet Scheduler 中的 tablet 数量 -* num of tablet schedule round: Tablet Scheduler 运行次数 -* cost of tablet schedule(ms): Tablet Scheduler 运行总耗时 -* num of tablet being scheduled: 被调度的 Tablet 总数量 -* num of tablet being scheduled succeeded: 被成功调度的 Tablet 总数量 -* num of tablet being scheduled failed: 调度失败的 Tablet 总数量 -* num of tablet being scheduled discard: 调度失败且被抛弃的 Tablet 总数量 -* num of tablet priority upgraded: 优先级上调次数 -* num of tablet priority downgraded: 优先级下调次数 -* num of clone task: number of clone tasks generated -* num of clone task succeeded: clone 任务成功的数量 -* num of clone task failed: clone 任务失败的数量 -* num of clone task timeout: clone 任务超时的数量 -* num of replica missing error: the number of tablets whose status is checked is the missing copy -* num of replica version missing error: 检查的状态为版本缺失的 tablet 的数量(该统计值包括了 num of replica relocating 和 num of replica missing in cluster error) -*num of replica relocation *29366;* 24577;*replica relocation tablet * -* num of replica redundant error: Number of tablets whose checked status is replica redundant -* num of replica missing in cluster error: 检查的状态为不在对应 cluster 的 tablet 的数量 -* num of balance scheduled: 均衡调度的次数 - -> Note: The above states are only historical accumulative values. We also print these statistics regularly in the FE logs, where the values in parentheses represent the number of changes in each statistical value since the last printing dependence of the statistical information. - -## Relevant configuration instructions - -### Adjustable parameters - -The following adjustable parameters are all configurable parameters in fe.conf. - -* use\_new\_tablet\_scheduler - - * Description: Whether to enable the new replica scheduling mode. The new replica scheduling method is the replica scheduling method introduced in this document. If turned on, `disable_colocate_join` must be `true`. Because the new scheduling strategy does not support data fragmentation scheduling of co-locotion tables for the time being. - * Default value:true - * Importance: High - -* tablet\_repair\_delay\_factor\_second - - * Note: For different scheduling priorities, we will delay different time to start repairing. In order to prevent a large number of unnecessary replica repair tasks from occurring in the process of routine restart and upgrade. This parameter is a reference coefficient. For HIGH priority, the delay is the reference coefficient * 1; for NORMAL priority, the delay is the reference coefficient * 2; for LOW priority, the delay is the reference coefficient * 3. That is, the lower the priority, the longer the delay waiting time. If the user wants to repair the copy as soon as possible, this parameter can be reduced appropriately. - * Default value: 60 seconds - * Importance: High - -* schedule\_slot\_num\_per\_path - - * Note: The default number of slots allocated to each disk for replica repair. This number represents the number of replica repair tasks that a disk can run simultaneously. If you want to repair the copy faster, you can adjust this parameter appropriately. The higher the single value, the greater the impact on IO. - * Default value: 2 - * Importance: High - -* balance\_load\_score\_threshold - - * Description: Threshold of Cluster Equilibrium. The default is 0.1, or 10%. When the load core of a BE node is not higher than or less than 10% of the average load core, we think that the node is balanced. If you want to make the cluster load more even, you can adjust this parameter appropriately. - * Default value: 0.1 - * Importance: - -* storage\_high\_watermark\_usage\_percent 和 storage\_min\_left\_capacity\_bytes - - * Description: These two parameters represent the upper limit of the maximum space utilization of a disk and the lower limit of the minimum space remaining, respectively. When the space utilization of a disk is greater than the upper limit or the remaining space is less than the lower limit, the disk will no longer be used as the destination address for balanced scheduling. - * Default values: 0.85 and 1048576000 (1GB) - * Importance: - -* disable\_balance - - * Description: Control whether to turn off the balancing function. When replicas are in equilibrium, some functions, such as ALTER TABLE, will be banned. Equilibrium can last for a long time. Therefore, if the user wants to do the prohibited operation as soon as possible. This parameter can be set to true to turn off balanced scheduling. - * Default value: false - * Importance: - -### Unadjustable parameters - -The following parameters do not support modification for the time being, just for illustration. - -* Tablet Checker scheduling interval - - Tablet Checker schedules checks every 20 seconds. - -* Tablet Scheduler scheduling interval - - Tablet Scheduler schedules every five seconds - -* Number of Tablet Scheduler Schedules per Batch - - Tablet Scheduler schedules up to 50 tablets at a time. - -* Tablet Scheduler Maximum Waiting Schedule and Number of Tasks in Operation - - The maximum number of waiting tasks and running tasks is 2000. When over 2000, Tablet Checker will no longer generate new scheduling tasks to Tablet Scheduler. - -* Tablet Scheduler Maximum Balanced Task Number - - The maximum number of balanced tasks is 500. When more than 500, there will be no new balancing tasks. - -* Number of slots per disk for balancing tasks - - The number of slots per disk for balancing tasks is 2. This slot is independent of the slot used for replica repair. - -* Update interval of cluster equilibrium - - Tablet Scheduler recalculates the load score of the cluster every 20 seconds. - -* Minimum and Maximum Timeout for Clone Tasks - - A clone task timeout time range is 3 minutes to 2 hours. The specific timeout is calculated by the size of the tablet. The formula is (tablet size)/ (5MB/s). When a clone task fails three times, the task terminates. - -* Dynamic Priority Adjustment Strategy - - The minimum priority adjustment interval is 5 minutes. When a tablet schedule fails five times, priority is lowered. When a tablet is not scheduled for 30 minutes, priority is raised. - -## Relevant issues - -* In some cases, the default replica repair and balancing strategy may cause the network to be full (mostly in the case of gigabit network cards and a large number of disks per BE). At this point, some parameters need to be adjusted to reduce the number of simultaneous balancing and repair tasks. - -* Current balancing strategies for copies of Colocate Table do not guarantee that copies of the same Tablet will not be distributed on the BE of the same host. However, the repair strategy of the copy of Colocate Table detects this distribution error and corrects it. However, it may occur that after correction, the balancing strategy regards the replicas as unbalanced and rebalances them. As a result, the Colocate Group cannot achieve stability because of the continuous alternation between the two states. In view of this situation, we suggest that when using Colocate attribute, we try to ensure that the cluster is isomorphic, so as to reduce the probability that replicas are distributed on the same host. - -## Best Practices - -### Control and manage the progress of replica repair and balancing of clusters - -In most cases, Doris can automatically perform replica repair and cluster balancing by default parameter configuration. However, in some cases, we need to manually intervene to adjust the parameters to achieve some special purposes. Such as prioritizing the repair of a table or partition, disabling cluster balancing to reduce cluster load, prioritizing the repair of non-colocation table data, and so on. - -This section describes how to control and manage the progress of replica repair and balancing of the cluster by modifying the parameters. - -1. Deleting Corrupt Replicas - - In some cases, Doris may not be able to automatically detect some corrupt replicas, resulting in frequent query or import errors on the corrupt replicas. In this case, we need to delete the corrupted copies manually. This method can be used to: delete a copy with a high version number resulting in a -235 error, delete a corrupted copy of a file, etc. - - First, find the tablet id of the corresponding copy, let's say 10001, and use `show tablet 10001;` and execute the `show proc` statement to see the details of each copy of the corresponding tablet. - - Assuming that the backend id of the copy to be deleted is 20001, the following statement is executed to mark the copy as `bad`. - - ``` - ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10001", "backend_id" = "20001", "status" = "bad"); - ``` - - At this point, the `show proc` statement again shows that the `IsBad` column of the corresponding copy has a value of `true`. - - The replica marked as `bad` will no longer participate in imports and queries. The replica repair logic will automatically replenish a new replica at the same time. 2. - -2. prioritize repairing a table or partition - - `help admin repair table;` View help. This command attempts to repair the tablet of the specified table or partition as a priority. - -3. Stop the balancing task - - The balancing task will take up some network bandwidth and IO resources. If you wish to stop the generation of new balancing tasks, you can do so with the following command. - - ``` - ADMIN SET FRONTEND CONFIG ("disable_balance" = "true"); - ``` - -4. Stop all replica scheduling tasks - - Copy scheduling tasks include balancing and repair tasks. These tasks take up some network bandwidth and IO resources. All replica scheduling tasks (excluding those already running, including colocation tables and common tables) can be stopped with the following command. - - ``` - ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "true"); - ``` - -5. Stop the copy scheduling task for all colocation tables. - - The colocation table copy scheduling is run separately and independently from the regular table. In some cases, users may wish to stop the balancing and repair of colocation tables first and use the cluster resources for normal table repair with the following command. - - ``` - ADMIN SET FRONTEND CONFIG ("disable_colocate_balance" = "true"); - ``` - -6. Repair replicas using a more conservative strategy - - Doris automatically repairs replicas when it detects missing replicas, BE downtime, etc. However, in order to reduce some errors caused by jitter (e.g., BE being down briefly), Doris delays triggering these tasks. - - * The `tablet_repair_delay_factor_second` parameter. Default 60 seconds. Depending on the priority of the repair task, it will delay triggering the repair task for 60 seconds, 120 seconds, or 180 seconds. This time can be extended so that longer exceptions can be tolerated to avoid triggering unnecessary repair tasks by using the following command. - - ``` - ADMIN SET FRONTEND CONFIG ("tablet_repair_delay_factor_second" = "120"); - ``` - -7. use a more conservative strategy to trigger redistribution of colocation groups - - Redistribution of colocation groups may be accompanied by a large number of tablet migrations. `colocate_group_relocate_delay_second` is used to control the redistribution trigger delay. The default is 1800 seconds. If a BE node is likely to be offline for a long time, you can try to increase this parameter to avoid unnecessary redistribution by. - - ``` - ADMIN SET FRONTEND CONFIG ("colocate_group_relocate_delay_second" = "3600"); - ``` - -8. Faster Replica Balancing - - Doris' replica balancing logic adds a normal replica first and then deletes the old one for the purpose of replica migration. When deleting the old replica, Doris waits for the completion of the import task that has already started on this replica to avoid the balancing task from affecting the import task. However, this will slow down the execution speed of the balancing logic. In this case, you can make Doris ignore this wait and delete the old replica directly by modifying the following parameters. - - ``` - ADMIN SET FRONTEND CONFIG ("enable_force_drop_redundant_replica" = "true"); - ``` - - This operation may cause some import tasks to fail during balancing (requiring a retry), but it will speed up balancing significantly. - -Overall, when we need to bring the cluster back to a normal state quickly, consider handling it along the following lines. - -1. find the tablet that is causing the highly optimal task to report an error and set the problematic copy to bad. -2. repair some tables with the `admin repair` statement. -3. Stop the replica balancing logic to avoid taking up cluster resources, and then turn it on again after the cluster is restored. -4. Use a more conservative strategy to trigger repair tasks to deal with the avalanche effect caused by frequent BE downtime. -5. Turn off scheduling tasks for colocation tables on-demand and focus cluster resources on repairing other high-optimality data. diff --git a/docs/en/administrator-guide/outfile.md b/docs/en/administrator-guide/outfile.md deleted file mode 100644 index f07c6d54e0..0000000000 --- a/docs/en/administrator-guide/outfile.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -{ - "title": "Export Query Result", - "language": "en" -} ---- - - - -# Export Query Result - -This document describes how to use the `SELECT INTO OUTFILE` command to export query results. - -## Syntax - -The `SELECT INTO OUTFILE` statement can export the query results to a file. Currently supports export to remote storage through Broker process, or directly through S3, HDFS protocol such as HDFS, S3, BOS and COS(Tencent Cloud) through the Broker process. The syntax is as follows: - -``` -query_stmt -INTO OUTFILE "file_path" -[format_as] -[properties] -``` - -* `file_path` - - `file_path` specify the file path and file name prefix. Like: `hdfs://path/to/my_file_`. - - The final file name will be assembled as `my_file_`, file seq no and the format suffix. File seq no starts from 0, determined by the number of split. - - ``` - my_file_abcdefg_0.csv - my_file_abcdefg_1.csv - my_file_abcdegf_2.csv - ``` - -* `[format_as]` - - ``` - FORMAT AS CSV - ``` - - Specify the export format. support csv、parquet、csv_with_names、csv_with_names_and_types. The default is CSV. - -* `[properties]` - - Specify the relevant attributes. Currently it supports exporting through the Broker process, or through the S3, HDFS protocol. - - + Broker related attributes need to be prefixed with `broker.`. For details, please refer to [Broker Document](./broker.html). - + HDFS protocal can directly execute HDFS protocal configuration. hdfs.fs.defaultFS is used to fill in the namenode address and port. It is required. - + S3 protocol can directly execute S3 protocol configuration. - - ``` - PROPERTIES - ("broker.prop_key" = "broker.prop_val", ...) - or - ("hdfs.fs.defaultFS" = "xxx", "hdfs.user" = "xxx") - or - ("AWS_ENDPOINT" = "xxx", ...) - ``` - - Other properties - - ``` - PROPERTIES - ("key1" = "val1", "key2" = "val2", ...) - ``` - - currently supports the following properties: - - * `column_separator`: Column separator, only applicable to CSV format. The default is `\t`. - * `line_delimiter`: Line delimiter, only applicable to CSV format. The default is `\n`. - * `max_file_size`: The max size of a single file. Default is 1GB. Range from 5MB to 2GB. Files exceeding this size will be splitted. - * `schema`: schema infomation for PARQUET, only applicable to PARQUET format. If the exported file format is PARQUET, `schema` must be specified. - -## Concurrent export - -By default, the export of the query result set is non-concurrent, that is, a single point of export. If the user wants the query result set to be exported concurrently, the following conditions need to be met: - -1. session variable 'enable_parallel_outfile' to enable concurrent export: ```set enable_parallel_outfile = true;``` -2. The export method is S3, HDFS instead of using a broker -3. The query can meet the needs of concurrent export, for example, the top level does not contain single point nodes such as sort. (I will give an example later, which is a query that does not export the result set concurrently) - -If the above three conditions are met, the concurrent export query result set can be triggered. Concurrency = ```be_instacne_num * parallel_fragment_exec_instance_num``` - -### How to verify that the result set is exported concurrently - -After the user enables concurrent export through the session variable setting, if you want to verify whether the current query can be exported concurrently, you can use the following method. - -``` -explain select xxx from xxx where xxx into outfile "s3://xxx" format as csv properties ("AWS_ENDPOINT" = "xxx", ...); -``` - -After explaining the query, Doris will return the plan of the query. If you find that ```RESULT FILE SINK``` appears in ```PLAN FRAGMENT 1```, it means that the export concurrency has been opened successfully. -If ```RESULT FILE SINK``` appears in ```PLAN FRAGMENT 0```, it means that the current query cannot be exported concurrently (the current query does not satisfy the three conditions of concurrent export at the same time). - -``` -Planning example for concurrent export: -+-----------------------------------------------------------------------------+ -| Explain String | -+-----------------------------------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS: | | | | -| PARTITION: UNPARTITIONED | -| | -| RESULT SINK | -| | -| 1:EXCHANGE | -| | -| PLAN FRAGMENT 1 | -| OUTPUT EXPRS:`k1` + `k2` | -| PARTITION: HASH_PARTITIONED: `default_cluster:test`.`multi_tablet`.`k1` | -| | -| RESULT FILE SINK | -| FILE PATH: s3://ml-bd-repo/bpit_test/outfile_1951_ | -| STORAGE TYPE: S3 | -| | -| 0:OlapScanNode | -| TABLE: multi_tablet | -+-----------------------------------------------------------------------------+ -``` - -## Usage example - -For details, please refer to [OUTFILE Document](../sql-reference/sql-statements/Data%20Manipulation/OUTFILE.md). - -## Return result - -The command is a synchronization command. The command returns, which means the operation is over. -At the same time, a row of results will be returned to show the exported execution result. - -If it exports and returns normally, the result is as follows: - -``` -mysql> select * from tbl1 limit 10 into outfile "file:///home/work/path/result_"; -+------------+-----------+----------+--------------------------------------------------------------------+ -| FileNumber | TotalRows | FileSize | URL | -+------------+-----------+----------+--------------------------------------------------------------------+ -| 1 | 2 | 8 | file:///192.168.1.10/home/work/path/result_{fragment_instance_id}_ | -+------------+-----------+----------+--------------------------------------------------------------------+ -1 row in set (0.05 sec) -``` - -* FileNumber: The number of files finally generated. -* TotalRows: The number of rows in the result set. -* FileSize: The total size of the exported file. Unit byte. -* URL: If it is exported to a local disk, the Compute Node to which it is exported is displayed here. - -If a concurrent export is performed, multiple rows of data will be returned. - -``` -+------------+-----------+----------+--------------------------------------------------------------------+ -| FileNumber | TotalRows | FileSize | URL | -+------------+-----------+----------+--------------------------------------------------------------------+ -| 1 | 3 | 7 | file:///192.168.1.10/home/work/path/result_{fragment_instance_id}_ | -| 1 | 2 | 4 | file:///192.168.1.11/home/work/path/result_{fragment_instance_id}_ | -+------------+-----------+----------+--------------------------------------------------------------------+ -2 rows in set (2.218 sec) -``` - -If the execution is incorrect, an error message will be returned, such as: - -``` -mysql> SELECT * FROM tbl INTO OUTFILE ... -ERROR 1064 (HY000): errCode = 2, detailMessage = Open broker writer failed ... -``` - -## Notice - -* The CSV format does not support exporting binary types, such as BITMAP and HLL types. These types will be output as `\N`, which is null. -* If you do not enable concurrent export, the query result is exported by a single BE node in a single thread. Therefore, the export time and the export result set size are positively correlated. Turning on concurrent export can reduce the export time. -* The export command does not check whether the file and file path exist. Whether the path will be automatically created or whether the existing file will be overwritten is entirely determined by the semantics of the remote storage system. -* If an error occurs during the export process, the exported file may remain on the remote storage system. Doris will not clean these files. The user needs to manually clean up. -* The timeout of the export command is the same as the timeout of the query. It can be set by `SET query_timeout = xxx`. -* For empty result query, there will be an empty file. -* File spliting will ensure that a row of data is stored in a single file. Therefore, the size of the file is not strictly equal to `max_file_size`. -* For functions whose output is invisible characters, such as BITMAP and HLL types, the output is `\N`, which is NULL. -* At present, the output type of some geo functions, such as `ST_Point` is VARCHAR, but the actual output value is an encoded binary character. Currently these functions will output garbled characters. For geo functions, use `ST_AsText` for output. diff --git a/docs/en/administrator-guide/privilege.md b/docs/en/administrator-guide/privilege.md deleted file mode 100644 index f20316b1f7..0000000000 --- a/docs/en/administrator-guide/privilege.md +++ /dev/null @@ -1,224 +0,0 @@ ---- -{ - "title": "Authority Management", - "language": "en" -} ---- - - - -# Authority Management - -Doris's new privilege management system refers to Mysql's privilege management mechanism, achieves table-level fine-grained privilege control, role-based privilege access control, and supports whitelist mechanism. - -## Noun Interpretation - -1. user_identity - - In a permission system, a user is identified as a User Identity. User ID consists of two parts: username and userhost. Username is a user name, which is composed of English upper and lower case. Userhost represents the IP from which the user link comes. User_identity is presented as username@'userhost', representing the username from userhost. - - Another expression of user_identity is username@['domain'], where domain is the domain name, which can be resolved into a set of IPS by DNS BNS (Baidu Name Service). The final expression is a set of username@'userhost', so we use username@'userhost'to represent it. - -2. Privilege - - The objects of permissions are nodes, databases or tables. Different permissions represent different operating permissions. - -3. Role - - Doris can create custom named roles. Roles can be seen as a set of permissions. When a newly created user can be assigned a role, the role's permissions are automatically granted. Subsequent changes in the role's permissions will also be reflected in all user permissions that belong to the role. - -4. user_property - - User attributes are directly attached to a user, not to a user identity. That is, both cmy@'192.%'and cmy@['domain'] have the same set of user attributes, which belong to user cmy, not cmy@'192.%' or cmy@['domain']. - - User attributes include, but are not limited to, the maximum number of user connections, import cluster configuration, and so on. - -## Supported operations - -1. Create users: CREATE USER -2. Delete users: DROP USER -3. Authorization: GRANT -4. Withdrawal: REVOKE -5. Create role: CREATE ROLE -6. Delete Roles: DROP ROLE -7. View current user privileges: SHOW GRANTS -8. View all user privilegesSHOW ALL GRANTS; -9. View the created roles: SHOW ROLES -10. View user attributes: SHOW PROPERTY - -For detailed help with the above commands, you can use help + command to get help after connecting Doris through the MySQL client. For example `HELP CREATE USER`. - -## Permission type - -Doris currently supports the following permissions - -1. Node_priv - - Nodes change permissions. Including FE, BE, BROKER node addition, deletion, offline operations. Currently, this permission can only be granted to Root users. - -2. Grant_priv - - Permissions change permissions. Allow the execution of operations including authorization, revocation, add/delete/change user/role, etc. - -3. Select_priv - - Read-only access to databases and tables. - -4. Load_priv - - Write permissions to databases and tables. Including Load, Insert, Delete and so on. - -5. Alter_priv - - Change permissions on databases and tables. It includes renaming libraries/tables, adding/deleting/changing columns, and adding/deleting partitions. - -6. Create_priv - - The right to create databases, tables, and views. - -7. Drop_priv - - Delete permissions for databases, tables, and views. - -## Permission hierarchy - -At the same time, according to the scope of application of permissions, we divide them into three levels: - -1. GLOBAL LEVEL: Global permissions. That is, permissions on `*.*` granted by GRANT statements. The granted permissions apply to any table in any database. -2. DATABASE LEVEL: Database-level permissions. That is, permissions on `db.*` granted by GRANT statements. The granted permissions apply to any table in the specified database. -3. TABLE LEVEL: Table-level permissions. That is, permissions on `db.tbl` granted by GRANT statements. The permissions granted apply to the specified tables in the specified database. - - -## ADMIN /GRANT - -ADMIN\_PRIV and GRANT\_PRIV have the authority of **"grant authority"** at the same time, which is more special. The operations related to these two privileges are described here one by one. - -1. CREATE USER - - * Users with ADMIN or GRANT privileges at any level can create new users. - -2. DROP USER - - * Only ADMIN privileges can delete users. - -3. CREATE/DROP ROLE - - * Only ADMIN privileges can create roles. - -4. GRANT /REVOKE - - * Users with ADMIN or GLOBAL GRANT privileges can grant or revoke the privileges of any user. - * Users with GRANT privileges at the DATABASE level can grant or revoke the privileges of any user on the specified database. - * Users with GRANT privileges at TABLE level can grant or revoke the privileges of any user on the specified tables in the specified database. - -5. SET PASSWORD - - * Users with ADMIN or GLOBAL GRANT privileges can set any user's password. - * Ordinary users can set their corresponding User Identity password. The corresponding User Identity can be viewed by `SELECT CURRENT_USER();`command. - * Users with GRANT privileges at non-GLOBAL level cannot set the password of existing users, but can only specify the password when creating users. - - -## Some explanations - -1. When Doris initializes, the following users and roles are automatically created: - - 1. Operator role: This role has Node\_priv and Admin\_priv, i.e. all permissions for Doris. In a subsequent upgrade version, we may restrict the role's permissions to Node\_priv, which is to grant only node change permissions. To meet some cloud deployment requirements. - - 2. admin role: This role has Admin\_priv, which is all permissions except for node changes. - - 3. root@'%': root user, which allows login from any node, with the role of operator. - - 4. admin@'%': admin user, allowing login from any node, role admin. - -2. It is not supported to delete or change the permissions of default created roles or users. - -3. The user of the operator role has one and only one user. Users of admin roles can create multiple. - -4. Operational instructions for possible conflicts - - 1. Conflict between domain name and ip: - - Assume that the following users are created: - - CREATE USER cmy@['domain']; - - And authorize: - - GRANT SELECT_PRIV ON \*.\* TO cmy@['domain'] - - The domain is resolved into two ips: IP1 and IP2 - - Let's assume that we have a separate authorization for cmy@'ip1': - - GRANT ALTER_PRIV ON \*.\* TO cmy@'ip1'; - - The permissions of CMY @'ip1'will be changed to SELECT\_PRIV, ALTER\_PRIV. And when we change the permissions of cmy@['domain'] again, cmy@'ip1' will not follow. - - 2. duplicate IP conflicts: - - Assume that the following users are created: - - CREATE USER cmy@'%' IDENTIFIED BY "12345"; - - CREATE USER cmy@'192.%' IDENTIFIED BY "abcde"; - - In priority,'192.%'takes precedence over'%', so when user CMY tries to login Doris with password '12345' from 192.168.1.1, the machine will be rejected. - -5. Forget passwords - - If you forget your password and cannot log in to Doris, you can log in to Doris without a password using the following command on the machine where the Doris FE node is located: - - `mysql-client -h 127.0.0.1 -P query_port -uroot` - - After login, the password can be reset through the SET PASSWORD command. - -6. No user can reset the password of the root user except the root user himself. - -7. ADMIN\_PRIV permissions can only be granted or revoked at the GLOBAL level. - -8. Having GRANT\_PRIV at GLOBAL level is actually equivalent to having ADMIN\_PRIV, because GRANT\_PRIV at this level has the right to grant arbitrary permissions, please use it carefully. - -9. `current_user()` and `user()` - - Users can view `current_user` and `user` respectively by `SELECT current_user();` and `SELECT user();`. Where `current_user` indicates which identity the current user is passing through the authentication system, and `user` is the user's current actual `user_identity`. - -  For example, suppose the user `user1@'192.%'` is created, and then a user user1 from 192.168.10.1 is logged into the system. At this time, `current_user` is `user1@'192.%'`, and `user` is `user1@'192.168.10.1'`. - - All privileges are given to a `current_user`, and the real user has all the privileges of the corresponding `current_user`. - -## Best Practices - -Here are some usage scenarios of Doris privilege system. - -1. Scene 1 - - The users of Doris cluster are divided into Admin, RD and Client. Administrators have all the rights of the whole cluster, mainly responsible for cluster building, node management and so on. The development engineer is responsible for business modeling, including database building, data import and modification. Users access different databases and tables to get data. - - In this scenario, ADMIN or GRANT privileges can be granted to administrators. Give RD CREATE, DROP, ALTER, LOAD, SELECT permissions to any or specified database tables. Give Client SELECT permission to any or specified database table. At the same time, it can also simplify the authorization of multiple users by creating different roles. - -2. Scene 2 - - There are multiple services in a cluster, and each business may use one or more data. Each business needs to manage its own users. In this scenario. Administrator users can create a user with GRANT privileges at the DATABASE level for each database. The user can only authorize the specified database for the user. - -3. Blacklist - - Doris itself does not support blacklist, only whitelist, but we can simulate blacklist in some way. Suppose you first create a user named `user@'192.%'`, which allows users from `192.*` to login. At this time, if you want to prohibit users from `192.168.10.1` from logging in, you can create another user with `cmy@'192.168.10.1'` and set a new password. Since `192.168.10.1` has a higher priority than `192.%`, user can no longer login by using the old password from `192.168.10.1`. - - diff --git a/docs/en/administrator-guide/query_cache.md b/docs/en/administrator-guide/query_cache.md deleted file mode 100644 index bb0cde2e43..0000000000 --- a/docs/en/administrator-guide/query_cache.md +++ /dev/null @@ -1,138 +0,0 @@ ---- -{ - "title": "QUERY CACHE", - "language": "en" -} ---- - - - -# QUERY CACHE - -## 1 Demond - -Although the corresponding cache is also made in the database storage layer, the cache in the database storage layer is generally aimed at the query content, and the granularity is too small. Generally, only when the data in the table is not changed can the corresponding cache of the database play a role. However, this can not reduce the huge IO pressure brought by the addition, deletion and query of the database by the business system. Therefore, the database cache technology was born here to realize the cache of hot data, improve the response speed of the application, and greatly relieve the pressure of the back-end database - -- High concurrency scenarios - Doris have a well support for high concurrency while single sever is unable to load too high QPS. - -- Complex Graph Dashboard - It is not uncommon to see that, data of the complex Dashboard and the large screen applications come from many table together which have tens of queries in a single page.Even though every single query cost only few milliseconds, the total queries would cost seconds. - -- Trend Analysis - In some scenarios, the queries are in a given date range , the index is shown by date.For example, we want to query the trend of the number of user in the last 7 days.This type of queries has a large amount of data and a wide range of fields, and the queries often takes tens of seconds. - -- User repeated query - If the product does not have an anti-re-flash mechanism, the user accidentally refreshes the page repeatedly due many reasons, which resulting in submitting a large number of repeated SQL - -In the above four scenarios, we have solutions at the application layer. We put the result of queries in the Redis and update the cache periodically or the user update the cache manually.However, this solution has the following problems: - -- Inconsistence of data , we are unable to sense the update of data, causing users to often see old data - -- Low hit rate, we usually cache the whole result of query.If the data is writed real-time, we would often failed in cache, resulting in low hit rate and overload for the system. - -- Extra Cost we introduce external cache components, which will bring system complexity and increase additional costs. - -## 2 Solutions - -At present, we design two modules: result cache and partition cache - -## 3 Explanation of terms - -1. result cache - -SQL directly caches the result collection of queries for users - -2. partition cache - -In the partition granularity, cache the results of each partition query - -## 4 Design - -### 1 `result cache` - -result_cache is divided into two types. The first type is result_ cache_ The second type of TTL is result_ cache_ version - -#### `result_cache_ttl` - -result_ cache_ ttl variable is set in the user session. The user can customize whether to turn it on or not. The TTL time is used to determine whether the user's SQL uses cache. The correctness of the data is not guaranteed when the data is changed` - -The cache is stored and retrieved according to the user connected and the query SQL. If it exceeds the cache expiration time, the cache will not be hit and the cache will be cleaned - -#### ` result_cache_version` - -result_ cache_ version stores and fetches the cache according to the signature of SQL, partition ID of the query table, latest version of partition. The combination of the three determines a cache dataset. If any one of them changes, such as SQL changes, query fields or conditions are not the same, or the version after data update changes, the cache will not be hit. - -If multiple tables are joined, the latest partition ID and the latest version number are used. If one of the tables is updated, the partition ID or version number will be different, and the cache will not be hit. - -### 2 `partition_cache` - -1. SQL can be split in parallel, Q = Q1 ∪ Q2 ... ∪ Qn, R= R1 ∪ R2 ... ∪ Rn, Q is the query statement and R is the result set -2. Split into read-only partition and updatable partition, read-only partition cache, update partition not cache - -## 5 usage - -|cache type|usage| -|--|--| -|result_cache_ttl|Mainly solve the scenario of high QPS and repeated query by users| -|result_cache_version|It mainly solves the scenario that the whole table has not changed for a long time| -|partition_cache|It mainly solves the scenario that the historical partition does not change| - -## 6 parameter - -### fe - -1. `cache_per_query_max_row_count` -- Cache the maximum number of rows per query -- The default value is 3000 - -2. `cache_per_query_max_size_In_bytes` -- The size of each query in bytes -- The default value is 1MB - -3. `result_cache_ttl_In_milliseconds` -- Cache duration of result cache -- The default value is 3S - -### be - -1. `cache_max_partition_count` -- Be maximum number of partitions cache_ max_ partition_ Count refers to the maximum number of partitions corresponding to each SQL. If the partition is based on date, the data can be cached for more than 2 years. If you want to keep the cache for a longer time, please set this parameter larger and modify the cache_ result_ max_ row_ Count parameter. -- Default value : 1024 - -2. `cache_max_size_in_mb` `cache_elasticity_size_in_mb` -- The cache memory setting in backend has two parameters: cache_max_size_In_mb(256) and cache_elasticity_size_In_mb(128), memory exceeds cache_ max_ size_In_mb+cache_elasticity_size_In_mb will clean up and control the memory to cache_max_size_In_mb. These two parameters can be set according to the number of be nodes, the memory size of nodes, and cache hit rate. - -## 7 how to use - -- use enable_result_cache_ttl -``` -set `global` enable_result_cache_ttl =true -``` - -- use enable_result_cache_version -``` -set `global` enable_result_cache_version = true -``` - -- use enable_partition_cache -``` -set `global` enable_partition_cache = true -``` \ No newline at end of file diff --git a/docs/en/administrator-guide/resource-management.md b/docs/en/administrator-guide/resource-management.md deleted file mode 100644 index 7d0ed32079..0000000000 --- a/docs/en/administrator-guide/resource-management.md +++ /dev/null @@ -1,169 +0,0 @@ ---- -{ - "title": "Resource management", - "language": "en" -} ---- - - - -# Resource Management - -In order to save the compute and storage resources in the Doris cluster, Doris needs to reference to some other external resources to do the related work. such as spark/GPU for query, HDFS/S3 for external storage, spark/MapReduce for ETL, connect to external storage by ODBC driver. Therefore, Doris need a resource management mechanism to manage these external resources. - -## Fundamental Concept - -A resource contains basic information such as name and type. The name is globally unique. Different types of resources contain different attributes. Please refer to the introduction of each resource for details. - -The creation and deletion of resources can only be performed by users own `admin` permission. One resource belongs to the entire Doris cluster. Users with `admin` permission can assign permission of resource to other users. Please refer to `HELP GRANT` or doris document. - - -## Operation Of Resource - -There are three main commands for resource management: `create resource`, `drop resource` and `show resources`. They are to create, delete and check resources. The specific syntax of these three commands can be viewed by executing `help CMD` after MySQL client connects to Doris. - -1. CREATE RESOURCE - - - ```sql - CREATE [EXTERNAL] RESOURCE "resource_name" - PROPERTIES ("key"="value", ...); - ``` - - In the command to create a resource, the user must provide the following information: - - * `resource_name` name of the resource - * `PROPERTIES` related parameters, as follows: - * `type`: resource type, required. Currently, only spark and odbc_catalog are supported. - * For other parameters, see the resource introduction - - - -2. DROP RESOURCE - - This command can delete an existing resource. For details, please refer to: `HELP DROP RESOURCE` - -3. SHOW RESOURCES - - This command can view the resources that the user has permission to use. Please refer to: `HELP SHOW RESOURCES` - - - -## Resources Supported - -Currently, Doris can support - -* Spark resource: do ETL work -* ODBC resource: query and import data from external tables - -The following shows how the two resources are used. - -### Spark - -#### Parameter - -##### Spark Parameters: - -`spark.master`: required, currently supported yarn, spark://host:port. - -`spark.submit.deployMode`: The deployment mode of spark. required. It supports cluster and client. - -`spark.hadoop.yarn.resourcemanager.address`: required when master is yarn. - -`spark.hadoop.fs.defaultFS`: required when master is yarn. - -Other parameters are optional, refer to: http://spark.apache.org/docs/latest/configuration.html. - -##### If spark is used for ETL, also need to specify the following parameters: - -`working_dir`: Directory used by ETL. Spark is required when used as an ETL resource. For example: hdfs://host:port/tmp/doris. - -`broker`: The name of broker. Is required when spark be used as ETL resource. You need to use the `ALTER SYSTEM ADD BROKER` command to complete the configuration in advance. - - * `broker.property_key`: When the broker reads the intermediate file generated by ETL, it needs the specified authentication information. - - - -#### Example - -Create a spark resource named `spark0 `in the yarn cluster mode. - - -```sql -CREATE EXTERNAL RESOURCE "spark0" -PROPERTIES -( - "type" = "spark", - "spark.master" = "yarn", - "spark.submit.deployMode" = "cluster", - "spark.jars" = "xxx.jar,yyy.jar", - "spark.files" = "/tmp/aaa,/tmp/bbb", - "spark.executor.memory" = "1g", - "spark.yarn.queue" = "queue0", - "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999", - "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000", - "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", - "broker" = "broker0", - "broker.username" = "user0", - "broker.password" = "password0" -); -``` - -### ODBC - -#### Parameter - -##### ODBC Parameters: - -`type`: Required, must be `odbc_catalog`. As the type identifier of resource. - -`user`: The user name of the external table, required. - -`password`: The user password of the external table, required. - -`host`: The ip address of the external table, required. - -`port`: The port of the external table, required. - -`odbc_type`: Indicates the type of external table. Currently, Doris supports `MySQL` and `Oracle`. In the future, it may support more databases. The ODBC external table referring to the resource is required. The old MySQL external table referring to the resource is optional. - -`driver`: Indicates the driver dynamic library used by the ODBC external table. -The ODBC external table referring to the resource is required. The old MySQL external table referring to the resource is optional. - -For the usage of ODBC resource, please refer to [ODBC of Doris](../extending-doris/odbc-of-doris.html) - - -#### Example - -Create the ODBC resource of Oracle, named `oracle_odbc`. - -```sql -CREATE EXTERNAL RESOURCE `oracle_odbc` -PROPERTIES ( -"type" = "odbc_catalog", -"host" = "192.168.0.1", -"port" = "8086", -"user" = "test", -"password" = "test", -"database" = "test", -"odbc_type" = "oracle", -"driver" = "Oracle 19 ODBC driver" -); -``` diff --git a/docs/en/administrator-guide/running-profile.md b/docs/en/administrator-guide/running-profile.md deleted file mode 100644 index 17ce052f09..0000000000 --- a/docs/en/administrator-guide/running-profile.md +++ /dev/null @@ -1,276 +0,0 @@ ---- -{ - "title": "Statistics of query execution", - "language": "en" -} ---- - - - -# Statistics of query execution - -This document focuses on introducing the **Running Profile** which recorded runtime status of Doris in query execution. Using these statistical information, we can understand the execution of frgment to become a expert of Doris's **debugging and tuning**. - -## Noun Interpretation - -* **FE**: Frontend, frontend node of Doris. Responsible for metadata management and request access. - -* **BE**: Backend, backend node of Doris. Responsible for query execution and data storage. - -* **Fragment**: FE will convert the execution of specific SQL statements into corresponding fragments and distribute them to BE for execution. BE will execute corresponding fragments and gather the result of RunningProfile to send back FE. - -## Basic concepts - -FE splits the query plan into fragments and distributes them to BE for task execution. BE records the statistics of **Running State** when executing fragment. BE print the outputs statistics of fragment execution into the log. FE can also collect these statistics recorded by each fragment and print the results on FE's web page. -## Specific operation - -Turn on the report switch on FE through MySQL command - -``` -mysql> set enable_profile=true; -``` - -After executing the corresponding SQL statement(`is_report_success` in old versions), we can see the report information of the corresponding SQL statement on the FE web page like the picture below. -![image.png](/images/running_profile.png) - -The latest **100 statements** executed will be listed here. We can view detailed statistics of RunningProfile. -``` -Query: - Summary: - Query ID: 9664061c57e84404-85ae111b8ba7e83a - Start Time: 2020-05-02 10:34:57 - End Time: 2020-05-02 10:35:08 - Total: 10s323ms - Query Type: Query - Query State: EOF - Doris Version: trunk - User: root - Default Db: default_cluster:test - Sql Statement: select max(Bid_Price) from quotes group by Symbol -``` -Here is a detailed list of ```query ID, execution time, execution statement``` and other summary information. The next step is to print the details of each fragment collected from be. - ``` - Fragment 0: - Instance 9664061c57e84404-85ae111b8ba7e83d (host=TNetworkAddress(hostname:192.168.0.1, port:9060)):(Active: 10s270ms, % non-child: 0.14%) - - MemoryLimit: 2.00 GB - - BytesReceived: 168.08 KB - - PeakUsedReservation: 0.00 - - SendersBlockedTimer: 0ns - - DeserializeRowBatchTimer: 501.975us - - PeakMemoryUsage: 577.04 KB - - RowsProduced: 8.322K (8322) - EXCHANGE_NODE (id=4):(Active: 10s256ms, % non-child: 99.35%) - - ConvertRowBatchTime: 180.171us - - PeakMemoryUsage: 0.00 - - RowsReturned: 8.322K (8322) - - MemoryUsed: 0.00 - - RowsReturnedRate: 811 - ``` -The fragment ID is listed here; ``` hostname ``` show the be node executing the fragment; ```active: 10s270ms```show the total execution time of the node; ```non child: 0.14%``` means the execution time of the execution node itself (not including the execution time of child nodes) as a percentage of the total time. - -`PeakMemoryUsage` indicates the peak memory usage of `EXCHANGE_NODE`; `RowsReturned` indicates the number of rows returned by `EXCHANGE_NODE`; `RowsReturnedRate`=`RowsReturned`/`ActiveTime`; the meaning of these three statistics in other `NODE` the same. - -Subsequently, the statistics of the child nodes will be printed in turn. **here you can distinguish the parent-child relationship by intent**. - -## Profile statistic analysis - -There are many statistical information collected at BE. so we list the corresponding meanings of profile are below: - -#### `Fragment` - - AverageThreadTokens: Number of threads used to execute fragment, excluding the usage of thread pool - - PeakReservation: Peak memory used by buffer pool - - MemoryLimit: Memory limit at query - - PeakMemoryUsage: Peak memory usage of instance - - RowsProduced: Number of rows that process - -#### `BlockMgr` - - BlocksCreated: Number of Block be created by BlockMgr - - BlocksRecycled: Number of Block be recycled by BlockMgr - - BytesWritten: How many bytes be writen to spill to disk - - MaxBlockSize: Max size of one Block - - TotalReadBlockTime: Total time read block from disk - -#### `DataStreamSender` - - BytesSent: Total bytes data sent - - IgnoreRows: Rows filtered - - LocalBytesSent: The amount bytes of local node send to it's self during Exchange - - OverallThroughput: Total throughput = BytesSent / Time - - SerializeBatchTime: Sending data serialization time - - UncompressedRowBatchSize: Size of rowbatch before sending data compression - -#### `ODBC_TABLE_SINK` - - NumSentRows: Total number of rows written to ODBC table - - TupleConvertTime: Time consuming of sending data serialization to insert statement - - ResultSendTime: Time consuming of writing through ODBC driver - -#### `EXCHANGE_NODE` - - BytesReceived: Size of bytes received by network - - DataArrivalWaitTime: Total waiting time of sender to push data - - MergeGetNext: When there is a sort in the lower level node, exchange node will perform a unified merge sort and output an ordered result. This indicator records the total time consumption of merge sorting, including the time consumption of MergeGetNextBatch. - - MergeGetNextBatch: It takes time for merge node to get data. If it is single-layer merge sort, the object to get data is network queue. For multi-level merge sorting, the data object is child merger. - - ChildMergeGetNext: When there are too many senders in the lower layer to send data, single thread merge will become a performance bottleneck. Doris will start multiple child merge threads to do merge sort in parallel. The sorting time of child merge is recorded, which is the cumulative value of multiple threads. - - ChildMergeGetNextBatch: It takes time for child merge to get data,If the time consumption is too large, the bottleneck may be the lower level data sending node. - - FirstBatchArrivalWaitTime: The time waiting for the first batch come from sender - - DeserializeRowBatchTimer: Time consuming to receive data deserialization - - SendersBlockedTotalTimer(*): When the DataStreamRecv's queue buffer is full, wait time of sender - - ConvertRowBatchTime: Time taken to transfer received data to RowBatch - - RowsReturned: Number of receiving rows - - RowsReturnedRate: Rate of rows received - -#### `SORT_NODE` - - InMemorySortTime: In memory sort time - - InitialRunsCreated: Number of initialize sort run - - MergeGetNext: Time cost of MergeSort from multiple sort_run to get the next batch (only show spilled disk) - - MergeGetNextBatch: Time cost MergeSort one sort_run to get the next batch (only show spilled disk) - - SortDataSize: Total sorted data - - TotalMergesPerformed: Number of external sort merges - -#### `AGGREGATION_NODE` - - PartitionsCreated: Number of partition split by aggregate - - GetResultsTime: Time to get aggregate results from each partition - - HTResizeTime: Time spent in resizing hashtable - - HTResize: Number of times hashtable resizes - - HashBuckets: Number of buckets in hashtable - - HashBucketsWithDuplicate: Number of buckets with duplicatenode in hashtable - - HashCollisions: Number of hash conflicts generated - - HashDuplicateNodes: Number of duplicate nodes with the same buckets in hashtable - - HashFailedProbe: Number of failed probe operations - - HashFilledBuckets: Number of buckets filled data - - HashProbe: Number of hashtable probe - - HashTravelLength: The number of steps moved when hashtable queries - -#### `HASH_JOIN_NODE` - - ExecOption: The way to construct a HashTable for the right child (synchronous or asynchronous), the right child in Join may be a table or a subquery, the same is true for the left child - - BuildBuckets: The number of Buckets in HashTable - - BuildRows: the number of rows of HashTable - - BuildTime: Time-consuming to construct HashTable - - LoadFactor: Load factor of HashTable (ie the number of non-empty buckets) - - ProbeRows: Traverse the number of rows of the left child for Hash Probe - - ProbeTime: Time consuming to traverse the left child for Hash Probe, excluding the time consuming to call GetNext on the left child RowBatch - - PushDownComputeTime: The calculation time of the predicate pushdown condition - - PushDownTime: The total time consumed by the predicate push down. When Join, the right child who meets the requirements is converted to the left child's in query - -#### `CROSS_JOIN_NODE` - - ExecOption: The way to construct RowBatchList for the right child (synchronous or asynchronous) - - BuildRows: The number of rows of RowBatchList (ie the number of rows of the right child) - - BuildTime: Time-consuming to construct RowBatchList - - LeftChildRows: the number of rows of the left child - - LeftChildTime: The time it takes to traverse the left child and find the Cartesian product with the right child, not including the time it takes to call GetNext on the left child RowBatch - -#### `UNION_NODE` - - MaterializeExprsEvaluateTime: When the field types at both ends of the Union are inconsistent, the time spent to evaluates type conversion exprs and materializes the results - -#### `ANALYTIC_EVAL_NODE` - - EvaluationTime: Analysis function (window function) calculation total time - - GetNewBlockTime: It takes time to apply for a new block during initialization. Block saves the cache line window or the entire partition for analysis function calculation - - PinTime: the time it takes to apply for a new block later or reread the block written to the disk back to the memory - - UnpinTime: the time it takes to flush the data of the block to the disk when the memory pressure of the block that is not in use or the current operator is high - -#### `OLAP_SCAN_NODE` - -The `OLAP_SCAN_NODE` is responsible for specific data scanning tasks. One `OLAP_SCAN_NODE` will generate one or more `OlapScanner`. Each Scanner thread is responsible for scanning part of the data. - -Some or all of the predicate conditions in the query will be pushed to `OLAP_SCAN_NODE`. Some of these predicate conditions will continue to be pushed down to the storage engine in order to use the storage engine's index for data filtering. The other part will be kept in `OLAP_SCAN_NODE` to filter the data returned from the storage engine. - -The profile of the `OLAP_SCAN_NODE` node is usually used to analyze the efficiency of data scanning. It is divided into three layers: `OLAP_SCAN_NODE`, `OlapScanner`, and `SegmentIterator` according to the calling relationship. - -The profile of a typical `OLAP_SCAN_NODE` is as follows. Some indicators will have different meanings depending on the storage format (V1 or V2). - -``` -OLAP_SCAN_NODE (id=0):(Active: 1.2ms,% non-child: 0.00%) - - BytesRead: 265.00 B # The amount of data read from the data file. Assuming that 10 32-bit integers are read, the amount of data is 10 * 4B = 40 Bytes. This data only represents the fully expanded size of the data in memory, and does not represent the actual IO size. - - NumDiskAccess: 1 # The number of disks involved in this ScanNode node. - - NumScanners: 20 # The number of Scanners generated by this ScanNode. - - PeakMemoryUsage: 0.00 # Peak memory usage during query, not used yet - - RowsRead: 7 # The number of rows returned from the storage engine to the Scanner, excluding the number of rows filtered by the Scanner. - - RowsReturned: 7 # The number of rows returned from ScanNode to the upper node. - - RowsReturnedRate: 6.979K /sec # RowsReturned/ActiveTime - - TabletCount: 20 # The number of Tablets involved in this ScanNode. - - TotalReadThroughput: 74.70 KB/sec # BytesRead divided by the total time spent in this node (from Open to Close). For IO bounded queries, this should be very close to the total throughput of all the disks - - ScannerBatchWaitTime: 426.886us # To count the time the transfer thread waits for the scaner thread to return rowbatch. - - ScannerWorkerWaitTime: 17.745us # To count the time that the scanner thread waits for the available worker threads in the thread pool. - OlapScanner: - - BlockConvertTime: 8.941us # The time it takes to convert a vectorized Block into a RowBlock with a row structure. The vectorized Block is VectorizedRowBatch in V1 and RowBlockV2 in V2. - - BlockFetchTime: 468.974us # Rowset Reader gets the time of the Block. - - ReaderInitTime: 5.475ms # The time when OlapScanner initializes Reader. V1 includes the time to form MergeHeap. V2 includes the time to generate various Iterators and read the first group of blocks. - - RowsDelFiltered: 0 # Including the number of rows filtered out according to the Delete information in the Tablet, and the number of rows filtered for marked deleted rows under the unique key model. - - RowsPushedCondFiltered: 0 # Filter conditions based on the predicates passed down, such as the conditions passed from BuildTable to ProbeTable in Join calculation. This value is not accurate, because if the filtering effect is poor, it will no longer be filtered. - - ScanTime: 39.24us # The time returned from ScanNode to the upper node. - - ShowHintsTime_V1: 0ns # V2 has no meaning. Read part of the data in V1 to perform ScanRange segmentation. - SegmentIterator: - - BitmapIndexFilterTimer: 779ns # Use bitmap index to filter data time-consuming. - - BlockLoadTime: 415.925us # SegmentReader(V1) or SegmentIterator(V2) gets the time of the block. - - BlockSeekCount: 12 # The number of block seeks when reading Segment. - - BlockSeekTime: 222.556us # It takes time to block seek when reading Segment. - - BlocksLoad: 6 # read the number of blocks - - CachedPagesNum: 30 # In V2 only, when PageCache is enabled, the number of Pages that hit the Cache. - - CompressedBytesRead: 0.00 # In V1, the size of the data read from the file before decompression. In V2, the pre-compressed size of the read page that did not hit the PageCache. - - DecompressorTimer: 0ns # Data decompression takes time. - - IOTimer: 0ns # IO time for actually reading data from the operating system. - - IndexLoadTime_V1: 0ns # Only in V1, it takes time to read Index Stream. - - NumSegmentFiltered: 0 # When generating Segment Iterator, the number of Segments that are completely filtered out through column statistics and query conditions. - - NumSegmentTotal: 6 # Query the number of all segments involved. - - RawRowsRead: 7 # The number of raw rows read in the storage engine. See below for details. - - RowsBitmapIndexFiltered: 0 # Only in V2, the number of rows filtered by the Bitmap index. - - RowsBloomFilterFiltered: 0 # Only in V2, the number of rows filtered by BloomFilter index. - - RowsKeyRangeFiltered: 0 # In V2 only, the number of rows filtered out by SortkeyIndex index. - - RowsStatsFiltered: 0 # In V2, the number of rows filtered by the ZoneMap index, including the deletion condition. V1 also contains the number of rows filtered by BloomFilter. - - RowsConditionsFiltered: 0 # Only in V2, the number of rows filtered by various column indexes. - - RowsVectorPredFiltered: 0 # The number of rows filtered by the vectorized condition filtering operation. - - TotalPagesNum: 30 # Only in V2, the total number of pages read. - - UncompressedBytesRead: 0.00 # V1 is the decompressed size of the read data file (if the file does not need to be decompressed, the file size is directly counted). In V2, only the decompressed size of the Page that missed PageCache is counted (if the Page does not need to be decompressed, the Page size is directly counted) - - VectorPredEvalTime: 0ns # Time-consuming of vectorized condition filtering operation. - - ShortPredEvalTime: 0ns # Time-consuming of short-circuiting predicate condition filtering operations. - - PredColumnReadTime: 0ns # Time-consuming of predicate column read. - - LazyReadTime: 0ns # Time-consuming of non-predicate column read. - - OutputColumnTime: 0ns # Time-consuming of materialize columns. -``` - -The predicate push down and index usage can be inferred from the related indicators of the number of data rows in the profile. The following only describes the profile in the reading process of segment V2 format data. In segment V1 format, the meaning of these indicators is slightly different. - - - When reading a segment V2, if the query has key_ranges (the query range composed of prefix keys), first filter the data through the SortkeyIndex index, and the number of filtered rows is recorded in `RowsKeyRangeFiltered`. - - After that, use the Bitmap index to perform precise filtering on the columns containing the bitmap index in the query condition, and the number of filtered rows is recorded in `RowsBitmapIndexFiltered`. - - After that, according to the equivalent (eq, in, is) condition in the query condition, use the BloomFilter index to filter the data and record it in `RowsBloomFilterFiltered`. The value of `RowsBloomFilterFiltered` is the difference between the total number of rows of the Segment (not the number of rows filtered by the Bitmap index) and the number of remaining rows after BloomFilter, so the data filtered by BloomFilter may overlap with the data filtered by Bitmap. - - After that, use the ZoneMap index to filter the data according to the query conditions and delete conditions and record it in `RowsStatsFiltered`. - - `RowsConditionsFiltered` is the number of rows filtered by various indexes, including the values ​​of `RowsBloomFilterFiltered` and `RowsStatsFiltered`. - - So far, the Init phase is completed, and the number of rows filtered by the condition to be deleted in the Next phase is recorded in `RowsDelFiltered`. Therefore, the number of rows actually filtered by the delete condition are recorded in `RowsStatsFiltered` and `RowsDelFiltered` respectively. - - `RawRowsRead` is the final number of rows to be read after the above filtering. - - `RowsRead` is the number of rows finally returned to Scanner. `RowsRead` is usually smaller than `RawRowsRead`, because returning from the storage engine to the Scanner may go through a data aggregation. If the difference between `RawRowsRead` and `RowsRead` is large, it means that a large number of rows are aggregated, and aggregation may be time-consuming. - - `RowsReturned` is the number of rows finally returned by ScanNode to the upper node. `RowsReturned` is usually smaller than `RowsRead`. Because there will be some predicate conditions on the Scanner that are not pushed down to the storage engine, filtering will be performed once. If the difference between `RowsRead` and `RowsReturned` is large, it means that many rows are filtered in the Scanner. This shows that many highly selective predicate conditions are not pushed to the storage engine. The filtering efficiency in Scanner is worse than that in storage engine. - -Through the above indicators, you can roughly analyze the number of rows processed by the storage engine and the size of the final filtered result row. Through the `Rows***Filtered` group of indicators, it is also possible to analyze whether the query conditions are pushed down to the storage engine, and the filtering effects of different indexes. In addition, a simple analysis can be made through the following aspects. - - - Many indicators under `OlapScanner`, such as `IOTimer`, `BlockFetchTime`, etc., are the accumulation of all Scanner thread indicators, so the value may be relatively large. And because the Scanner thread reads data asynchronously, these cumulative indicators can only reflect the cumulative working time of the Scanner, and do not directly represent the time consumption of the ScanNode. The time-consuming ratio of ScanNode in the entire query plan is the value recorded in the `Active` field. Sometimes it appears that `IOTimer` has tens of seconds, but `Active` is actually only a few seconds. This situation is usually due to: - - `IOTimer` is the accumulated time of multiple Scanners, and there are more Scanners. - - The upper node is time-consuming. For example, the upper node takes 100 seconds, while the lower ScanNode only takes 10 seconds. The field reflected in `Active` may be only a few milliseconds. Because while the upper layer is processing data, ScanNode has performed data scanning asynchronously and prepared the data. When the upper node obtains data from ScanNode, it can obtain the prepared data, so the Active time is very short. - - `NumScanners` represents the number of Tasks submitted by the Scanner to the thread pool. It is scheduled by the thread pool in `RuntimeState`. The two parameters `doris_scanner_thread_pool_thread_num` and `doris_scanner_thread_pool_queue_size` control the size of the thread pool and the queue length respectively. Too many or too few threads will affect query efficiency. At the same time, some summary indicators can be divided by the number of threads to roughly estimate the time consumption of each thread. - - `TabletCount` indicates the number of tablets to be scanned. Too many may mean a lot of random read and data merge operations. - - `UncompressedBytesRead` indirectly reflects the amount of data read. If the value is large, it means that there may be a lot of IO operations. - - `CachedPagesNum` and `TotalPagesNum` can check the hitting status of PageCache. The higher the hit rate, the less time-consuming IO and decompression operations. - -#### `Buffer pool` - - AllocTime: Memory allocation time - - CumulativeAllocationBytes: Cumulative amount of memory allocated - - CumulativeAllocations: Cumulative number of memory allocations - - PeakReservation: Peak of reservation - - PeakUnpinnedBytes: Amount of memory data of unpin - - PeakUsedReservation: Peak usage of reservation - - ReservationLimit: Limit of reservation of bufferpool - diff --git a/docs/en/administrator-guide/runtime-filter.md b/docs/en/administrator-guide/runtime-filter.md deleted file mode 100644 index 9089e0d2e2..0000000000 --- a/docs/en/administrator-guide/runtime-filter.md +++ /dev/null @@ -1,284 +0,0 @@ ---- -{ - "title": "Runtime Filter", - "language": "en" -} ---- - - - -# Runtime Filter - -Runtime Filter is a new feature officially added in Doris 0.15. It is designed to dynamically generate filter conditions for certain Join queries at runtime to reduce the amount of scanned data, avoid unnecessary I/O and network transmission, and speed up the query. - -It's design, implementation and effects, please refer to [ISSUE 6116](https://github.com/apache/incubator-doris/issues/6116). - -## Noun Interpretation - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, the back-end node of Doris. Responsible for query execution and data storage. -* Left table: the table on the left during Join query. Perform Probe operation. The order can be adjusted by Join Reorder. -* Right table: the table on the right during Join query. Perform the Build operation. The order can be adjusted by Join Reorder. -* Fragment: FE will convert the execution of specific SQL statements into corresponding fragments and send them to BE for execution. The corresponding Fragment is executed on the BE, and the results are aggregated and returned to the FE. -* Join on clause: `Aa=Bb` in `A join B on Aa=Bb`, based on this to generate join conjuncts during query planning, including expr used by join Build and Probe, where Build expr is called in Runtime Filter src expr, Probe expr are called target expr in Runtime Filter. - -## Principle - -Runtime Filter is generated during query planning, constructed in HashJoinNode, and applied in ScanNode. - -For example, there is currently a Join query between the T1 table and the T2 table. Its Join mode is HashJoin. T1 is a fact table with 100,000 rows of data. T2 is a dimension table with 2000 rows of data. Doris join The actual situation is: -``` -| > HashJoinNode < -| | | -| | 100000 | 2000 -| | | -| OlapScanNode OlapScanNode -| ^ ^ -| | 100000 | 2000 -| T1 T2 -| -``` -Obviously, scanning data for T2 is much faster than T1. If we take the initiative to wait for a while and then scan T1, after T2 sends the scanned data record to HashJoinNode, HashJoinNode calculates a filter condition based on the data of T2, such as the maximum value of T2 data And the minimum value, or build a Bloom Filter, and then send this filter condition to ScanNode waiting to scan T1, the latter applies this filter condition and delivers the filtered data to HashJoinNode, thereby reducing the number of probe hash tables and network overhead. This filter condition is Runtime Filter, and the effect is as follows: -``` -| > HashJoinNode < -| | | -| | 6000 | 2000 -| | | -| OlapScanNode OlapScanNode -| ^ ^ -| | 100000 | 2000 -| T1 T2 -| -``` -If the filter condition (Runtime Filter) can be pushed down to the storage engine, in some cases, the index can be used to directly reduce the amount of scanned data, thereby greatly reducing the scanning time. The effect is as follows: -``` -| > HashJoinNode < -| | | -| | 6000 | 2000 -| | | -| OlapScanNode OlapScanNode -| ^ ^ -| | 6000 | 2000 -| T1 T2 -| -``` -It can be seen that, unlike predicate push-down and partition cutting, Runtime Filter is a filter condition dynamically generated at runtime, that is, when the query is run, the join on clause is parsed to determine the filter expression, and the expression is broadcast to ScanNode that is reading the left table , Thereby reducing the amount of scanned data, thereby reducing the number of probe hash table, avoiding unnecessary I/O and network transmission. - -Runtime Filter is mainly used to optimize joins between a large table and a small table. If the amount of data in the left table is too small, or the amount of data in the right table is too large, the Runtime Filter may not achieve the expected effect. - -## Usage - -### Runtime Filter query options - -For query options related to Runtime Filter, please refer to the following sections: - -- The first query option is to adjust the type of Runtime Filter used. In most cases, you only need to adjust this option, and keep the other options as default. - - - `runtime_filter_type`: Including Bloom Filter, MinMax Filter, IN predicate and IN Or Bloom Filter. By default, only IN Or Bloom Filter will be used. In some cases, the performance will be higher when both Bloom Filter, MinMax Filter and IN predicate are used at the same time. - -- Other query options usually only need to be further adjusted in certain specific scenarios to achieve the best results. Usually only after performance testing, optimize for resource-intensive, long enough running time and high enough frequency queries. - - - `runtime_filter_mode`: Used to adjust the push-down strategy of Runtime Filter, including three strategies of OFF, LOCAL, and GLOBAL. The default setting is the GLOBAL strategy - - - `runtime_filter_wait_time_ms`: the time that ScanNode in the left table waits for each Runtime Filter, the default is 1000ms - - - `runtime_filters_max_num`: The maximum number of Bloom Filters in the Runtime Filter that can be applied to each query, the default is 10 - - - `runtime_bloom_filter_min_size`: the minimum length of Bloom Filter in Runtime Filter, default 1048576 (1M) - - - `runtime_bloom_filter_max_size`: the maximum length of Bloom Filter in Runtime Filter, the default is 16777216 (16M) - - - `runtime_bloom_filter_size`: The default length of Bloom Filter in Runtime Filter, the default is 2097152 (2M) - - - `runtime_filter_max_in_num`: If the number of rows in the right table of the join is greater than this value, we will not generate an IN predicate, the default is 1024 - -The query options are further explained below. - -#### 1.runtime_filter_type -Type of Runtime Filter used. - -**Type**: Number (1, 2, 4, 8) or the corresponding mnemonic string (IN, BLOOM_FILTER, MIN_MAX, ```IN_OR_BLOOM_FILTER```), the default is 8 (```IN_OR_BLOOM_FILTER```), use multiple commas to separate, pay attention to the need to add quotation marks , Or add any number of types, for example: -``` -set runtime_filter_type="BLOOM_FILTER,IN,MIN_MAX"; -``` -Equivalent to: -``` -set runtime_filter_type=7; -``` - -**Precautions for use** - -- **IN or Bloom Filter**: According to the actual number of rows in the right table during execution, the system automatically determines whether to use IN predicate or Bloom Filter. - - By default, IN Predicate will be used when the number of data rows in the right table is less than 1024 (which can be adjusted by ` runtime_filter_max_in_num 'in the session variable). Otherwise, use bloom filter. -- **Bloom Filter**: There is a certain misjudgment rate, which results in the filtered data being a little less than expected, but it will not cause the final result to be inaccurate. In most cases, Bloom Filter can improve performance or has no significant impact on performance, but in some cases Under circumstances will cause performance degradation. - - Bloom Filter construction and application overhead is high, so when the filtering rate is low, or the amount of data in the left table is small, Bloom Filter may cause performance degradation. - - At present, only the Key column of the left table can be pushed down to the storage engine if the Bloom Filter is applied, and the test results show that the performance of the Bloom Filter is often reduced when the Bloom Filter is not pushed down to the storage engine. - - Currently Bloom Filter only has short-circuit logic when using expression filtering on ScanNode, that is, when the false positive rate is too high, the Bloom Filter will not continue to be used, but there is no short-circuit logic when the Bloom Filter is pushed down to the storage engine , So when the filtration rate is low, it may cause performance degradation. - -- **MinMax Filter**: Contains the maximum value and the minimum value, thereby filtering data smaller than the minimum value and greater than the maximum value. The filtering effect of the MinMax Filter is related to the type of the Key column in the join on clause and the data distribution of the left and right tables. - - When the type of the Key column in the join on clause is int/bigint/double, etc., in extreme cases, if the maximum and minimum values ​​of the left and right tables are the same, there is no effect, otherwise the maximum value of the right table is less than the minimum value of the left table, or the minimum of the right table The value is greater than the maximum value in the left table, the effect is best. - - When the type of the Key column in the join on clause is varchar, etc., applying the MinMax Filter will often cause performance degradation. - -- **IN predicate**: Construct IN predicate based on all the values ​​of Key listed in the join on clause on the right table, and use the constructed IN predicate to filter on the left table. Compared with Bloom Filter, the cost of construction and application is lower. The amount of data in the right table is lower. When it is less, it tends to perform better. - - By default, only the number of data rows in the right table is less than 1024 will be pushed down (can be adjusted by `runtime_filter_max_in_num` in the session variable). - - Currently IN predicate already implement a merge method. - - When IN predicate and other filters are specified at the same time, and the filtering value of IN predicate does not reach runtime_filter_max_in_num will try to remove other filters. The reason is that IN predicate is an accurate filtering condition. Even if there is no other filter, it can filter efficiently. If it is used at the same time, other filters will do useless work. Currently, only when the producer and consumer of the runtime filter are in the same fragment can there be logic to remove the Non-IN predicate. - -#### 2.runtime_filter_mode -Used to control the transmission range of Runtime Filter between instances. - -**Type**: Number (0, 1, 2) or corresponding mnemonic string (OFF, LOCAL, GLOBAL), default 2 (GLOBAL). - -**Precautions for use** - -LOCAL: Relatively conservative, the constructed Runtime Filter can only be used in the same Fragment on the same instance (the smallest unit of query execution), that is, the Runtime Filter producer (the HashJoinNode that constructs the Filter) and the consumer (the ScanNode that uses the RuntimeFilter) The same Fragment, such as the general scene of broadcast join; - -GLOBAL: Relatively radical. In addition to satisfying the scenario of the LOCAL strategy, the Runtime Filter can also be combined and transmitted to different Fragments on different instances via the network. For example, the Runtime Filter producer and consumer are in different Fragments, such as shuffle join. - -In most cases, the GLOBAL strategy can optimize queries in a wider range of scenarios, but in some shuffle joins, the cost of generating and merging Runtime Filters exceeds the performance advantage brought to the query, and you can consider changing to the LOCAL strategy. - -If the join query involved in the cluster does not improve performance due to Runtime Filter, you can change the setting to OFF to completely turn off the function. - -When building and applying Runtime Filters on different Fragments, the reasons and strategies for merging Runtime Filters can be found in [ISSUE 6116](https://github.com/apache/incubator-doris/issues/6116) - -#### 3.runtime_filter_wait_time_ms -Waiting for Runtime Filter is time consuming. - -**Type**: integer, default 1000, unit ms - -**Precautions for use** - -After the Runtime Filter is turned on, the ScanNode in the table on the left will wait for a period of time for each Runtime Filter assigned to itself before scanning the data, that is, if the ScanNode is assigned 3 Runtime Filters, it will wait at most 3000ms. - -Because it takes time to build and merge the Runtime Filter, ScanNode will try to push down the Runtime Filter that arrives within the waiting time to the storage engine. If the waiting time is exceeded, ScanNode will directly start scanning data using the Runtime Filter that has arrived. - -If the Runtime Filter arrives after ScanNode starts scanning, ScanNode will not push the Runtime Filter down to the storage engine. Instead, it will use expression filtering on ScanNode based on the Runtime Filter for the data that has been scanned from the storage engine. The scanned data will not apply the Runtime Filter, so the intermediate data size obtained will be larger than the optimal solution, but serious cracking can be avoided. - -If the cluster is busy and there are many resource-intensive or long-time-consuming queries on the cluster, consider increasing the waiting time to avoid missing optimization opportunities for complex queries. If the cluster load is light, and there are many small queries on the cluster that only take a few seconds, you can consider reducing the waiting time to avoid an increase of 1s for each query. - -#### 4.runtime_filters_max_num -The upper limit of the number of Bloom Filters in the Runtime Filter generated by each query. - -**Type**: integer, default 10 - -**Precautions for use** -Currently, only the number of Bloom Filters is limited, because the construction and application of Bloom Filters are more expensive than MinMax Filter and IN predicate. - -If the number of Bloom Filters generated exceeds the maximum allowable number, then the Bloom Filter with a large selectivity is retained. A large selectivity means that more rows are expected to be filtered. This setting can prevent Bloom Filter from consuming too much memory overhead and causing potential problems. -``` -Selectivity = (HashJoinNode Cardinality / HashJoinNode left child Cardinality) -- Because the cardinality of FE is currently inaccurate, the selectivity of Bloom Filter calculation here is inaccurate, so in the end, it may only randomly reserve part of Bloom Filter. -``` -This query option needs to be adjusted only when tuning some long-consuming queries involving joins between large tables. - -#### 5. Bloom Filter length related parameters -Including `runtime_bloom_filter_min_size`, `runtime_bloom_filter_max_size`, `runtime_bloom_filter_size`, used to determine the size (in bytes) of the Bloom Filter data structure used by the Runtime Filter. - -**Type**: Integer - -**Precautions for use** -Because it is necessary to ensure that the length of the Bloom Filter constructed by each HashJoinNode is the same to be merged, the length of the Bloom Filter is currently calculated in the FE query planning. - -If you can get the number of data rows (Cardinality) in the statistical information of the join right table, it will try to estimate the optimal size of the Bloom Filter based on Cardinality, and round to the nearest power of 2 (log value with the base 2). If the Cardinality of the table on the right cannot be obtained, the default Bloom Filter length `runtime_bloom_filter_size` will be used. `runtime_bloom_filter_min_size` and `runtime_bloom_filter_max_size` are used to limit the minimum and maximum length of the final Bloom Filter. - -Larger Bloom Filters are more effective when processing high-cardinality input sets, but require more memory. If the query needs to filter high cardinality columns (for example, containing millions of different values), you can consider increasing the value of `runtime_bloom_filter_size` for some benchmark tests, which will help make the Bloom Filter filter more accurate, so as to obtain the expected Performance improvement. - -The effectiveness of Bloom Filter depends on the data distribution of the query, so it is usually only for some specific queries to additionally adjust the length of the Bloom Filter, rather than global modification, generally only for some long time-consuming queries involving joins between large tables. Only when you need to adjust this query option. - -### View Runtime Filter generated by query - -The query plan that can be displayed by the `explain` command includes the join on clause information used by each Fragment, as well as comments on the generation and use of the Runtime Filter by the Fragment, so as to confirm whether the Runtime Filter is applied to the desired join on clause. -- The comment contained in the Fragment that generates the Runtime Filter, such as `runtime filters: filter_id[type] <- table.column`. -- Use the comment contained in the fragment of Runtime Filter such as `runtime filters: filter_id[type] -> table.column`. - -The query in the following example uses a Runtime Filter with ID RF000. -``` -CREATE TABLE test (t1 INT) DISTRIBUTED BY HASH (t1) BUCKETS 2 PROPERTIES("replication_num" = "1"); -INSERT INTO test VALUES (1), (2), (3), (4); - -CREATE TABLE test2 (t2 INT) DISTRIBUTED BY HASH (t2) BUCKETS 2 PROPERTIES("replication_num" = "1"); -INSERT INTO test2 VALUES (3), (4), (5); - -EXPLAIN SELECT t1 FROM test JOIN test2 where test.t1 = test2.t2; -+-------------------------------------------------------------------+ -| Explain String | -+-------------------------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS:`t1` | -| | -| 4:EXCHANGE | -| | -| PLAN FRAGMENT 1 | -| OUTPUT EXPRS: | -| PARTITION: HASH_PARTITIONED: `default_cluster:ssb`.`test`.`t1` | -| | -| 2:HASH JOIN | -| | join op: INNER JOIN (BUCKET_SHUFFLE) | -| | equal join conjunct: `test`.`t1` = `test2`.`t2` | -| | runtime filters: RF000[in] <- `test2`.`t2` | -| | | -| |----3:EXCHANGE | -| | | -| 0:OlapScanNode | -| TABLE: test | -| runtime filters: RF000[in] -> `test`.`t1` | -| | -| PLAN FRAGMENT 2 | -| OUTPUT EXPRS: | -| PARTITION: HASH_PARTITIONED: `default_cluster:ssb`.`test2`.`t2` | -| | -| 1:OlapScanNode | -| TABLE: test2 | -+-------------------------------------------------------------------+ --- The line of `runtime filters` above shows that `2:HASH JOIN` of `PLAN FRAGMENT 1` generates IN predicate with ID RF000, --- Among them, the key values of `test2`.`t2` are only known at runtime, --- This IN predicate is used in `0:OlapScanNode` to filter unnecessary data when reading `test`.`t1`. - -SELECT t1 FROM test JOIN test2 where test.t1 = test2.t2; --- Return 2 rows of results [3, 4]; - --- Through the query profile (set enable_profile=true;) you can view the detailed information of the internal work of the query, --- Including whether each Runtime Filter is pushed down, waiting time, --- and the total time from prepare to receiving Runtime Filter for OLAP_SCAN_NODE. -RuntimeFilter:in: - - HasPushDownToEngine: true - - AWaitTimeCost: 0ns - - EffectTimeCost: 2.76ms - --- In addition, in the OLAP_SCAN_NODE of the profile, you can also view the filtering effect --- and time consumption after the Runtime Filter is pushed down. - - RowsVectorPredFiltered: 9.320008M (9320008) - - VectorPredEvalTime: 364.39ms -``` - -## Runtime Filter planning rules -1. Only support the generation of Runtime Filter for the equivalent conditions in the join on clause, excluding the Null-safe condition, because it may filter out the null value of the join left table. -2. Does not support pushing down Runtime Filter to the left table of left outer, full outer, and anti join; -3. Does not support src expr or target expr is constant; -4. The equality of src expr and target expr is not supported; -5. The type of src expr is not supported to be equal to `HLL` or `BITMAP`; -6. Currently only supports pushing down Runtime Filter to OlapScanNode; -7. Target expr does not support NULL-checking expressions, such as `COALESCE/IFNULL/CASE`, because when the join on clause of other joins at the upper level of the outer join contains NULL-checking expressions and a Runtime Filter is generated, this Runtime Filter is downloaded Pushing to the left table of outer join may cause incorrect results; -8. The column (slot) in target expr is not supported, and an equivalent column cannot be found in the original table; -9. Column conduction is not supported. This includes two cases: - - First, when the join on clause contains A.k = B.k and B.k = C.k, currently C.k can only be pushed down to B.k, but not to A.k; - - Second, for example, the join on clause contains Aa + Bb = Cc. If Aa can be transmitted to Ba, that is, Aa and Ba are equivalent columns, then you can replace Aa with Ba, and then you can try to push the Runtime Filter down to B ( If Aa and Ba are not equivalent columns, they cannot be pushed down to B, because target expr must be bound to the only join left table); -10. The types of Target expr and src expr must be equal, because Bloom Filter is based on hash, if the types are not equal, it will try to convert the type of target expr to the type of src expr; -11. The Runtime Filter generated by `PlanNode.Conjuncts` is not supported. Unlike HashJoinNode's `eqJoinConjuncts` and `otherJoinConjuncts`, the Runtime Filter generated by `PlanNode.Conjuncts` found in the test that it may cause incorrect results, such as ` When an IN` subquery is converted to a join, the automatically generated join on clause will be stored in `PlanNode.Conjuncts`. At this time, applying Runtime Filter may result in missing some rows in the result. diff --git a/docs/en/administrator-guide/small-file-mgr.md b/docs/en/administrator-guide/small-file-mgr.md deleted file mode 100644 index 2a8226edda..0000000000 --- a/docs/en/administrator-guide/small-file-mgr.md +++ /dev/null @@ -1,104 +0,0 @@ ---- -{ - "title": "File Manager", - "language": "en" -} ---- - - - -# File Manager - -Some functions in Doris require some user-defined files. For example, public keys, key files, certificate files and so on are used to access external data sources. The File Manager provides a function that allows users to upload these files in advance and save them in Doris system, which can then be referenced or accessed in other commands. - -## Noun Interpretation - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. -* BDBJE: Oracle Berkeley DB Java Edition. Distributed embedded database for persistent metadata in FE. -* SmallFileMgr: File Manager. Responsible for creating and maintaining user files. - -## Basic concepts - -Files are files created and saved by users in Doris. - -A file is located by `database`, `catalog`, `file_name`. At the same time, each file also has a globally unique ID (file_id), which serves as the identification in the system. - -File creation and deletion can only be performed by users with `admin` privileges. A file belongs to a database. Users who have access to a database (queries, imports, modifications, etc.) can use the files created under the database. - -## Specific operation - -File management has three main commands: `CREATE FILE`, `SHOW FILE` and `DROP FILE`, creating, viewing and deleting files respectively. The specific syntax of these three commands can be viewed by connecting to Doris and executing `HELP cmd;`. - -1. CREATE FILE - - In the command to create a file, the user must provide the following information: - - * file_name: File name. User-defined, unique within a catalog. - * Catalog: Category of files. User-defined, unique within a database. - - > Doris also has some special classification names for specific commands. - - > 1. Kafka - - > When the data source is specified as Kafka in the routine Import command and the file needs to be referenced, Doris defaults to looking for the file from the catalog category named "kafka". - - * url: the download address of the file. Currently, only unauthenticated HTTP download addresses are supported. This download address is only used to download files from this address when executing the create file command. When the file is successfully created and saved in Doris, the address will no longer be used. - * md5: optional. The MD5 value of the file. If the user provides this value, the MD5 value will be checked after the file is downloaded. File creation fails if validation fails. - - When the file is created successfully, the file-related information will be persisted in Doris. Users can view successfully created files through the `SHOW FILE` command. - -2. SHOW FILE - - This command allows you to view files that have been created successfully. Specific operations see: `HELP SHOW FILE;` - -3. DROP FILE - - This command can delete a file that has been created. Specific operations see: `HELP DROP FILE;` - -## Implementation details - -### Create and delete files - -When the user executes the `CREATE FILE` command, FE downloads the file from a given URL. The contents of the file are stored in FE memory directly in the form of Base64 encoding. At the same time, the file content and meta-information related to the file will be persisted in BDBJE. All created files, their meta-information and file content reside in FE memory. If the FE goes down and restarts, meta information and file content will also be loaded into memory from the BDBJE. When a file is deleted, the relevant information is deleted directly from FE memory and persistent information is deleted from BDBJE. - -### Use of documents - -If the FE side needs to use the created file, SmallFileMgr will directly save the data in FE memory as a local file, store it in the specified directory, and return the local file path for use. - -If the BE side needs to use the created file, BE will download the file content to the specified directory on BE through FE's HTTP interface `api/get_small_file` for use. At the same time, BE also records the information of the files that have been downloaded in memory. When BE requests a file, it first checks whether the local file exists and verifies it. If the validation passes, the local file path is returned directly. If the validation fails, the local file is deleted and downloaded from FE again. When BE restarts, local files are preloaded into memory. - -## Use restrictions - -Because the file meta-information and content are stored in FE memory. So by default, only files with size less than 1MB can be uploaded. And the total number of files is limited to 100. The configuration items described in the next section can be modified. - -## Relevant configuration - -1. FE configuration - -* `Small_file_dir`: The path used to store uploaded files, defaulting to the `small_files/` directory of the FE runtime directory. -* `max_small_file_size_bytes`: A single file size limit in bytes. The default is 1MB. File creation larger than this configuration will be rejected. -* `max_small_file_number`: The total number of files supported by a Doris cluster. The default is 100. When the number of files created exceeds this value, subsequent creation will be rejected. - - > If you need to upload more files or increase the size limit of a single file, you can modify the `max_small_file_size_bytes` and `max_small_file_number` parameters by using the `ADMIN SET CONFIG` command. However, the increase in the number and size of files will lead to an increase in FE memory usage. - -2. BE configuration - -* `Small_file_dir`: The path used to store files downloaded from FE by default is in the `lib/small_files/` directory of the BE runtime directory. diff --git a/docs/en/administrator-guide/sql-mode.md b/docs/en/administrator-guide/sql-mode.md deleted file mode 100644 index 90ad36783c..0000000000 --- a/docs/en/administrator-guide/sql-mode.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -{ - "title": "SQL MODE", - "language": "en" -} ---- - - - -# SQL MODE - -The SQL MODE supported by Doris refers to the sql mode management mechanism of MySQL. Each client can set its own sql mode, and the database administrator with admin permission can set the global sql mode. - -## Sql mode introduction - -SQL MODE enables users to switch between different styles of SQL syntax and data verification strictness, making Doris more compatible with other databases. For example, in some databases, the '||' symbol is a string connector, but in Doris it is equivalent to 'or'. At this time, users only need to use SQL mode to switch to the style they want. Each client can set sql mode, which is valid in the current conversation. Only users with admin permission can set global SQL mode. - -## Theory - -SQL MODE is stored in session variables with a 64 bit long type. Each bit of this address represents the on / off (1 for on, 0 for off) state of a mode. As long as we know the specific bit of each mode, we can easily and quickly verify and operate SQL mode through bit operation. - -Every time you query sql mode, the long type will be parsed into a user-readable string. Similarly, the sql mode string sent by the user to the server will be parsed into a long type that can be stored in session variables. - -The set global sql mode will be persisted, so the operation on the global sql mode is always only once, even after the program is restarted, the last global sql mode can be recovered. - -## Operation - -1、set sql mode - -``` -set global sql_mode = "" -set session sql_mode = "" -``` ->At present, Doris's default sql mode is empty. ->Setting global sql mode requires admin permission and affects all clients that connect later. ->Setting session sql mode will only affect the current conversation client. The default setting way is session. - -2、select sql mode - -``` -select @@global.sql_mode -select @@session.sql_mode -``` ->In addition to this method, you can also view the current sql mode by returning all session variables as follows - -``` -show global variables -show session variables -``` - -## supported mode - -1. `PIPES_AS_CONCAT` - - Treat '||' as a string concatenation operator (same as CONCAT()) rather than as a synonym for OR. (e.g., `'a'||'b' = 'ab'`, `1||0 = '10'`) - -## combine mode - -(Work in progress) \ No newline at end of file diff --git a/docs/en/administrator-guide/time-zone.md b/docs/en/administrator-guide/time-zone.md deleted file mode 100644 index 25110b9630..0000000000 --- a/docs/en/administrator-guide/time-zone.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -{ - "title": "Time zone", - "language": "en" -} ---- - - - -# Time zone - -Doris supports multiple time zone settings - -## Noun Interpretation - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. - -## Basic concepts - -There are multiple time zone related parameters in Doris - -* `system_time_zone`: - -When the server starts, it will be set automatically according to the time zone set by the machine, which cannot be modified after setting. - -* `time_zone`: - -Server current time zone, set it at session level or global level. - -## Specific operations - -1. `SHOW VARIABLES LIKE '% time_zone%'` - - View the current time zone related configuration - -2. `SET time_zone = 'Asia/Shanghai'` - - This command can set the session level time zone, which will fail after disconnection. - -3. `SET global time_zone = 'Asia/Shanghai'` - - This command can set time zone parameters at the global level. The FE will persist the parameters and will not fail when the connection is disconnected. - -### Impact of time zone - -Time zone setting affects the display and storage of time zone sensitive values. - -It includes the values displayed by time functions such as `NOW()` or `CURTIME()`, as well as the time values in `SHOW LOAD` and `SHOW BACKENDS` statements. - -However, it does not affect the `LESS THAN VALUE` of the time-type partition column in the `CREATE TABLE` statement, nor does it affect the display of values stored as `DATE/DATETIME` type. - -Functions affected by time zone: - -* `FROM_UNIXTIME`: Given a UTC timestamp, return the date and time of the specified time zone, such as `FROM_UNIXTIME(0)`, return the CST time zone: `1970-01-08:00`. - -* `UNIX_TIMESTAMP`: Given a specified time zone date and time, return UTC timestamp, such as CST time zone `UNIX_TIMESTAMP('1970-01 08:00:00')`, return `0`. - -* `CURTIME`: Returns the datetime of specified time zone. - -* `NOW`: Returns the specified date and time of specified time zone. - -* `CONVERT_TZ`: Converts a date and time from one specified time zone to another. - -## Restrictions - -Time zone values can be given in several formats, case-insensitive: - -* A string representing UTC offset, such as '+10:00' or '-6:00'. - -* Standard time zone formats, such as "Asia/Shanghai", "America/Los_Angeles" - -* Abbreviated time zone formats such as MET and CTT are not supported. Because the abbreviated time zone is ambiguous in different scenarios, it is not recommended to use it. - -* In order to be compatible with Doris and support CST abbreviated time zone, CST will be internally transferred to "Asia/Shanghai", which is Chinese standard time zone. - -## Time zone format list - -[List of TZ database time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) - -[Edit on GitHub](https://github.com/apache/incubator-doris/blob/master/docs/documentation/en/administrator-guide/time-zone_EN.md) \ No newline at end of file diff --git a/docs/en/administrator-guide/update.md b/docs/en/administrator-guide/update.md deleted file mode 100644 index b3efc45ece..0000000000 --- a/docs/en/administrator-guide/update.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -{ - "title": "update", - "language": "en" -} ---- - - - -# Update - -If we need to modify or update the data in Doris, we can use the UPDATE command. - -## Applicable scenarios - -+ To modify the value of a row that meets certain conditions. -+ Point updates, small updates, where the rows to be updated are preferably a very small part of the entire table. -+ Only could be used in Unique table - -## Explanation of terms - -1. Unique model: A data model in the Doris system. When the user imports rows with the same Key, the Value of the latter overrides the existing Value, in the same sense as Unique in Mysql. - -## Fundamentals - -Use the query engine's own where filtering logic to filter the rows that need to be updated from the table to be updated. Then use the Unique model's own Value column replacement logic to change the rows to be updated and reinsert them into the table. This enables row-level updates. - -### Example - -Suppose there is an order table in Doris, where order id is the Key column, order status, and order amount are the Value columns. The data state is as follows. - -| order id | order amount | order status | -|--|--|--| -| 1 | 100| Pending Payment | - -At this time, after the user clicks the payment, Doris system needs to change the order id to '1' order status to 'pending shipment', you need to use the Update function. - -``` -UPDATE order SET order status='To be shipped' WHERE order id=1; -``` - -After the user executes the UPDATE command, the system performs the following three steps. - -+ Step 1: Read the rows that satisfy WHERE order id=1 - (1, 100, 'pending payment') -+ Step 2: Change the order status of the row from 'Pending Payment' to 'Pending Shipping' - (1, 100, 'Pending shipment') -+ Step 3: Insert the updated row back into the table to achieve the updated effect. - | order id | order amount | order status | - | ---| ---| ---| - | 1 | 100| Pending Payment | - | 1 | 100 | Pending shipments | - Since the table order is a UNIQUE model, the rows with the same Key, after which the latter will take effect, so the final effect is as follows. - | order id | order amount | order status | - |--|--|--| - | 1 | 100 | Pending shipments | - -## Basic operations - -### UPDATE syntax - -```UPDATE table_name SET value=xxx WHERE condition;``` - -+ ``table_name``: the table to be updated, must be a UNIQUE model table to update. - -+ value=xxx: The column to be updated, the left side of the equation must be the value column of the table. The right side of the equation can be a constant or an expression transformation of a column in a table. - For example, if value = 1, then the value of the column to be updated will be 1. - For example, if value = value + 1, the value of the column to be updated is incremented by 1. - -+ condition: Only rows that satisfy the condition will be updated. condition must be an expression that results in a Boolean type. - For example, if k1 = 1, only rows with a k1 column value of 1 will be updated. - For example, if k1 = k2, only rows with the same value in column k1 as in column k2 will be updated. - No support for unfilled condition, i.e., no support for full table updates. - -### Synchronization - -The Update syntax is a synchronization syntax in Doris. If the Update statement succeeds, the update succeeds and the data is visible. - -### Performance - -The performance of the Update statement is closely related to the number of rows to be updated and the retrieval efficiency of the condition. - -+ Number of rows to be updated: The more rows to be updated, the slower the Update statement will be. This is consistent with the principle of importing. - Doris updates are more suitable for occasional update scenarios, such as changing the values of individual rows. - Doris is not suitable for large batches of data changes. Large modifications can make Update statements take a long time to run. - -+ Condition retrieval efficiency: Doris Update implements the principle of reading the rows that satisfy the condition first, so if the condition retrieval efficiency is high, the Update will be faster. - The condition column should ideally be hit, indexed, or bucket clipped. This way Doris does not need to scan the entire table and can quickly locate the rows that need to be updated. This improves update efficiency. - It is strongly discouraged to include the UNIQUE model value column in the condition column. - -### Concurrency Control - -By default, multiple concurrent Update operations on the same table are not allowed at the same time. - -The main reason for this is that Doris currently supports row updates, which means that even if the user declares ``SET v2 = 1``, virtually all other Value columns will be overwritten (even though the values are not changed). - -This presents a problem in that if two Update operations update the same row at the same time, the behavior may be indeterminate. That is, there may be dirty data. - -However, in practice, the concurrency limit can be turned on manually if the user himself can guarantee that even if concurrent updates are performed, they will not operate on the same row at the same time. This is done by modifying the FE configuration ``enable_concurrent_update``. When the configuration value is true, there is no limit on concurrent updates. - -## Risks of use - -Since Doris currently supports row updates and uses a two-step read-and-write operation, there is uncertainty about the outcome of an Update statement if it modifies the same row as another Import or Delete statement. - -Therefore, when using Doris, you must be careful to control the concurrency of Update statements and other DML statements on the *user side itself*. - -## Version - -Doris Version 0.15.x + diff --git a/docs/en/administrator-guide/variables.md b/docs/en/administrator-guide/variables.md deleted file mode 100644 index e6d6e22d28..0000000000 --- a/docs/en/administrator-guide/variables.md +++ /dev/null @@ -1,499 +0,0 @@ ---- -{ - "title": "Variable", - "language": "en" -} ---- - - - -# Variable - -This document focuses on currently supported variables. - -Variables in Doris refer to variable settings in MySQL. However, some of the variables are only used to be compatible with some MySQL client protocols, and do not produce their actual meaning in the MySQL database. - -## Variable setting and viewing - -### View - -All or specified variables can be viewed via `SHOW VARIABLES [LIKE 'xxx'];`. Such as: - -``` -SHOW VARIABLES; -SHOW VARIABLES LIKE '%time_zone%'; -``` - -### Settings - -Some variables can be set at global-level or session-only. For global-level, the set value will be used in subsequent new session connections. For session-only, the variable only works for the current session. - -For session-only, set by the `SET var_name=xxx;` statement. Such as: - -``` -SET exec_mem_limit = 137438953472; -SET forward_to_master = true; -SET time_zone = "Asia/Shanghai"; -``` - -For global-level, set by `SET GLOBAL var_name=xxx;`. Such as: - -``` -SET GLOBAL exec_mem_limit = 137438953472 -``` - -> Note 1: Only ADMIN users can set variable at global-level. -> Note 2: Global-level variables do not affect variable values in the current session, only variables in new sessions. - -Variables that support both session-level and global-level setting include: - -* `time_zone` -* `wait_timeout` -* `sql_mode` -* `enable_profile` -* `query_timeout` -* `exec_mem_limit` -* `batch_size` -* `parallel_fragment_exec_instance_num` -* `parallel_exchange_instance_num` -* `allow_partition_column_nullable` -* `insert_visible_timeout_ms` -* `enable_fold_constant_by_be` - -Variables that support only global-level setting include: - -* `default_rowset_type` - -At the same time, variable settings also support constant expressions. Such as: - -``` -SET exec_mem_limit = 10 * 1024 * 1024 * 1024; -SET forward_to_master = concat('tr', 'u', 'e'); -``` - -### Set variables in the query statement - -In some scenarios, we may need to set variables specifically for certain queries. -The SET_VAR hint sets the session value of a system variable temporarily (for the duration of a single statement). Examples: - -``` -SELECT /*+ SET_VAR(exec_mem_limit = 8589934592) */ name FROM people ORDER BY name; -SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3); -``` - -Note that the comment must start with /*+ and can only follow the SELECT. - -## Supported variables - -* `SQL_AUTO_IS_NULL` - - Used for compatible JDBC connection pool C3P0. No practical effect. - -* `auto_increment_increment` - - Used for compatibility with MySQL clients. No practical effect. - -* `autocommit` - - Used for compatibility with MySQL clients. No practical effect. - -* `batch_size` - - Used to specify the number of rows of a single packet transmitted by each node during query execution. By default, the number of rows of a packet is 1024 rows. That is, after the source node generates 1024 rows of data, it is packaged and sent to the destination node. - - A larger number of rows will increase the throughput of the query in the case of scanning large data volumes, but may increase the query delay in small query scenario. At the same time, it also increases the memory overhead of the query. The recommended setting range is 1024 to 4096. - -* `character_set_client` - - Used for compatibility with MySQL clients. No practical effect. - -* `character_set_connection` - - Used for compatibility with MySQL clients. No practical effect. - -* `character_set_results` - - Used for compatibility with MySQL clients. No practical effect. - -* `character_set_server` - - Used for compatibility with MySQL clients. No practical effect. - -* `codegen_level` - - Used to set the level of LLVM codegen. (Not currently in effect). - -* `collation_connection` - - Used for compatibility with MySQL clients. No practical effect. - -* `collation_database` - - Used for compatibility with MySQL clients. No practical effect. - -* `collation_server` - - Used for compatibility with MySQL clients. No practical effect. - -* `delete_without_partition` - - When set to true. When using the delete command to delete partition table data, no partition is required. The delete operation will be automatically applied to all partitions. - - Note, however, that the automatic application to all partitions may cause the delete command to take a long time to trigger a large number of subtasks and cause a long time. If it is not necessary, it is not recommended to turn it on. - -* `disable_colocate_join` - - Controls whether the [Colocation Join](./colocation-join.md) function is enabled. The default is false, which means that the feature is enabled. True means that the feature is disabled. When this feature is disabled, the query plan will not attempt to perform a Colocation Join. - - -* `enable_bucket_shuffle_join` - - Controls whether the [Bucket Shuffle Join] (./bucket-shuffle-join.md) function is enabled. The default is true, which means that the feature is enabled. False means that the feature is disabled. When this feature is disabled, the query plan will not attempt to perform a Bucket Shuffle Join. - -* `disable_streaming_preaggregations` - - Controls whether streaming pre-aggregation is turned on. The default is false, which is enabled. Currently not configurable and enabled by default. - -* `enable_insert_strict` - - Used to set the `strict` mode when loading data via INSERT statement. The default is false, which means that the `strict` mode is not turned on. For an introduction to this mode, see [here](./load-data/insert-into-manual.md). - -* `enable_spilling` - - Used to set whether to enable external sorting. The default is false, which turns off the feature. This feature is enabled when the user does not specify a LIMIT condition for the ORDER BY clause and also sets `enable_spilling` to true. When this feature is enabled, the temporary data is stored in the `doris-scratch/` directory of the BE data directory and the temporary data is cleared after the query is completed. - - This feature is mainly used for sorting operations with large amounts of data using limited memory. - - Note that this feature is experimental and does not guarantee stability. Please turn it on carefully. - -* `exec_mem_limit` - - Used to set the memory limit for a single query. The default is 2GB, you can set it in B/K/KB/M/MB/G/GB/T/TB/P/PB, the default is B. - - This parameter is used to limit the memory that can be used by an instance of a single query fragment in a query plan. A query plan may have multiple instances, and a BE node may execute one or more instances. Therefore, this parameter does not accurately limit the memory usage of a query across the cluster, nor does it accurately limit the memory usage of a query on a single BE node. The specific needs need to be judged according to the generated query plan. - - Usually, only some blocking nodes (such as sorting node, aggregation node, and join node) consume more memory, while in other nodes (such as scan node), data is streamed and does not occupy much memory. - - When a `Memory Exceed Limit` error occurs, you can try to increase the parameter exponentially, such as 4G, 8G, 16G, and so on. - -* `forward_to_master` - - The user sets whether to forward some commands to the Master FE node for execution. The default is `true`, which means no forwarding. There are multiple FE nodes in Doris, one of which is the Master node. Usually users can connect to any FE node for full-featured operation. However, some of detail information can only be obtained from the Master FE node. - - For example, the `SHOW BACKENDS;` command, if not forwarded to the Master FE node, can only see some basic information such as whether the node is alive, and forwarded to the Master FE to obtain more detailed information including the node startup time and the last heartbeat time. - - The commands currently affected by this parameter are as follows: - - 1. `SHOW FRONTEND;` - - Forward to Master to view the last heartbeat information. - - 2. `SHOW BACKENDS;` - - Forward to Master to view startup time, last heartbeat information, and disk capacity information. - - 3. `SHOW BROKERS;` - - Forward to Master to view the start time and last heartbeat information. - - 4. `SHOW TABLET;`/`ADMIN SHOW REPLICA DISTRIBUTION;`/`ADMIN SHOW REPLICA STATUS;` - - Forward to Master to view the tablet information stored in the Master FE metadata. Under normal circumstances, the tablet information in different FE metadata should be consistent. When a problem occurs, this method can be used to compare the difference between the current FE and Master FE metadata. - - 5. `SHOW PROC;` - - Forward to Master to view information about the relevant PROC stored in the Master FE metadata. Mainly used for metadata comparison. - -* `init_connect` - - Used for compatibility with MySQL clients. No practical effect. - -* `interactive_timeout` - - Used for compatibility with MySQL clients. No practical effect. - -* `enable_profile` - - Used to set whether you need to view the profile of the query. The default is false, which means no profile is required. - - By default, the BE sends a profile to the FE for viewing errors only if an error occurs in the query. A successful query will not send a profile. Sending a profile will incur a certain amount of network overhead, which is detrimental to a high concurrent query scenario. - - When the user wants to analyze the profile of a query, the query can be sent after this variable is set to true. After the query is finished, you can view the profile on the web page of the currently connected FE: - - `fe_host:fe_http:port/query` - - It will display the most recent 100 queries which `enable_profile` is set to true. - -* `language` - - Used for compatibility with MySQL clients. No practical effect. - -* `license` - - Show Doris's license. No other effect. - -* `load_mem_limit` - - Used to specify the memory limit of the load operation. The default is 0, which means that this variable is not used, and `exec_mem_limit` is used as the memory limit for the load operation. - - This variable is usually used for INSERT operations. Because the INSERT operation has both query and load part. If the user does not set this variable, the respective memory limits of the query and load part are `exec_mem_limit`. Otherwise, the memory of query part of INSERT is limited to `exec_mem_limit`, and the load part is limited to` load_mem_limit`. - - For other load methods, such as BROKER LOAD, STREAM LOAD, the memory limit still uses `exec_mem_limit`. - -* `lower_case_table_names` - - Used to control whether the user table name is case-sensitive. - - A value of 0 makes the table name case-sensitive. The default is 0. - - When the value is 1, the table name is case insensitive. Doris will convert the table name to lowercase when storing and querying. - The advantage is that any case of table name can be used in one statement. The following SQL is correct: - ``` - mysql> show tables; - +------------------+ - | Tables_ in_testdb| - +------------------+ - | cost | - +------------------+ - mysql> select * from COST where COst.id < 100 order by cost.id; - ``` - The disadvantage is that the table name specified in the table creation statement cannot be obtained after table creation. The table name viewed by 'show tables' is lower case of the specified table name. - - When the value is 2, the table name is case insensitive. Doris stores the table name specified in the table creation statement and converts it to lowercase for comparison during query. - The advantage is that the table name viewed by 'show tables' is the table name specified in the table creation statement; - The disadvantage is that only one case of table name can be used in the same statement. For example, the table name 'cost' can be used to query the 'cost' table: - ``` - mysql> select * from COST where COST.id < 100 order by COST.id; - ``` - - This variable is compatible with MySQL and must be configured at cluster initialization by specifying `lower_case_table_names=` in fe.conf. It cannot be modified by the `set` statement after cluster initialization is complete, nor can it be modified by restarting or upgrading the cluster. - - The system view table names in information_schema are case-insensitive and behave as 2 when the value of `lower_case_table_names` is 0. - -Translated with www.DeepL.com/Translator (free version) - -* `max_allowed_packet` - - Used for compatible JDBC connection pool C3P0. No practical effect. - -* `max_pushdown_conditions_per_column` - - For the specific meaning of this variable, please refer to the description of `max_pushdown_conditions_per_column` in [BE Configuration](./config/be_config.md). This variable is set to -1 by default, which means that the configuration value in `be.conf` is used. If the setting is greater than 0, the query in the current session will use the variable value, and ignore the configuration value in `be.conf`. - -* `max_scan_key_num` - - For the specific meaning of this variable, please refer to the description of `doris_max_scan_key_num` in [BE Configuration](./config/be_config.md). This variable is set to -1 by default, which means that the configuration value in `be.conf` is used. If the setting is greater than 0, the query in the current session will use the variable value, and ignore the configuration value in `be.conf`. - -* `net_buffer_length` - - Used for compatibility with MySQL clients. No practical effect. - -* `net_read_timeout` - - Used for compatibility with MySQL clients. No practical effect. - -* `net_write_timeout` - - Used for compatibility with MySQL clients. No practical effect. - -* `parallel_exchange_instance_num` - - Used to set the number of exchange nodes used by an upper node to receive data from the lower node in the execution plan. The default is -1, which means that the number of exchange nodes is equal to the number of execution instances of the lower nodes (default behavior). When the setting is greater than 0 and less than the number of execution instances of the lower node, the number of exchange nodes is equal to the set value. - - In a distributed query execution plan, the upper node usually has one or more exchange nodes for receiving data from the execution instances of the lower nodes on different BEs. Usually the number of exchange nodes is equal to the number of execution instances of the lower nodes. - - In some aggregate query scenarios, if the amount of data to be scanned at the bottom is large, but the amount of data after aggregation is small, you can try to modify this variable to a smaller value, which can reduce the resource overhead of such queries. Such as the scenario of aggregation query on the DUPLICATE KEY data model. - -* `parallel_fragment_exec_instance_num` - - For the scan node, set its number of instances to execute on each BE node. The default is 1. - - A query plan typically produces a set of scan ranges, the range of data that needs to be scanned. These data are distributed across multiple BE nodes. A BE node will have one or more scan ranges. By default, a set of scan ranges for each BE node is processed by only one execution instance. When the machine resources are abundant, you can increase the variable and let more execution instances process a set of scan ranges at the same time, thus improving query efficiency. - - The number of scan instances determines the number of other execution nodes in the upper layer, such as aggregate nodes and join nodes. Therefore, it is equivalent to increasing the concurrency of the entire query plan execution. Modifying this parameter will help improve the efficiency of large queries, but larger values will consume more machine resources, such as CPU, memory, and disk IO. - -* `query_cache_size` - - Used for compatibility with MySQL clients. No practical effect. - -* `query_cache_type` - - Used for compatible JDBC connection pool C3P0. No practical effect. - -* `query_timeout` - - Used to set the query timeout. This variable applies to all query statements in the current connection, as well as INSERT statements. The default is 5 minutes, in seconds. - -* `resource_group` - - Not used. - -* `send_batch_parallelism` - - Used to set the default parallelism for sending batch when execute InsertStmt operation, if the value for parallelism exceed `max_send_batch_parallelism_per_job` in BE config, then the coordinator BE will use the value of `max_send_batch_parallelism_per_job`. - -* `sql_mode` - - Used to specify SQL mode to accommodate certain SQL dialects. For the SQL mode, see [here](./sql-mode.md). - -* `sql_safe_updates` - - Used for compatibility with MySQL clients. No practical effect. - -* `sql_select_limit` - - Used for compatibility with MySQL clients. No practical effect. - -* `system_time_zone` - - Displays the current system time zone. Cannot be changed. - -* `time_zone` - - Used to set the time zone of the current session. The time zone has an effect on the results of certain time functions. For the time zone, see [here](./time-zone.md). - -* `tx_isolation` - - Used for compatibility with MySQL clients. No practical effect. - -* `tx_read_only` - - Used for compatibility with MySQL clients. No practical effect. - -* `transaction_read_only` - - Used for compatibility with MySQL clients. No practical effect. - -* `transaction_isolation` - - Used for compatibility with MySQL clients. No practical effect. - -* `version` - - Used for compatibility with MySQL clients. No practical effect. - -* `performance_schema` - - Used for compatibility with MySQL JDBC 8.0.16 or later version. No practical effect. - -* `version_comment` - - Used to display the version of Doris. Cannot be changed. - -* `wait_timeout` - - The length of the connection used to set up an idle connection. When an idle connection does not interact with Doris for that length of time, Doris will actively disconnect the link. The default is 8 hours, in seconds. - -* `default_rowset_type` - - Used for setting the default storage format of Backends storage engine. Valid options: alpha/beta - -* `use_v2_rollup` - - Used to control the sql query to use segment v2 rollup index to get data. This variable is only used for validation when upgrading to segment v2 feature. Otherwise, not recommended to use. - -* `rewrite_count_distinct_to_bitmap_hll` - - Whether to rewrite count distinct queries of bitmap and HLL types as bitmap_union_count and hll_union_agg. - -* `prefer_join_method` - - When choosing the join method(broadcast join or shuffle join), if the broadcast join cost and shuffle join cost are equal, which join method should we prefer. - - Currently, the optional values for this variable are "broadcast" or "shuffle". - -* `allow_partition_column_nullable` - - Whether to allow the partition column to be NULL when creating the table. The default is true, which means NULL is allowed. false means the partition column must be defined as NOT NULL. - -* `insert_visible_timeout_ms` - - When execute insert statement, doris will wait for the transaction to commit and visible after the import is completed. - This parameter controls the timeout of waiting for transaction to be visible. The default value is 10000, and the minimum value is 1000. - -* `enable_exchange_node_parallel_merge` - - In a sort query, when an upper level node receives the ordered data of the lower level node, it will sort the corresponding data on the exchange node to ensure that the final data is ordered. However, when a single thread merges multiple channels of data, if the amount of data is too large, it will lead to a single point of exchange node merge bottleneck. - - Doris optimizes this part if there are too many data nodes in the lower layer. Exchange node will start multithreading for parallel merging to speed up the sorting process. This parameter is false by default, which means that exchange node does not adopt parallel merge sort to reduce the extra CPU and memory consumption. - -* `extract_wide_range_expr` - - Used to control whether turn on the 'Wide Common Factors' rule. The value has two: true or false. On by default. - -* `enable_fold_constant_by_be` - - Used to control the calculation method of constant folding. The default is `false`, that is, calculation is performed in `FE`; if it is set to `true`, it will be calculated by `BE` through `RPC` request. - -* `cpu_resource_limit` - - Used to limit the resource overhead of a query. This is an experimental feature. The current implementation is to limit the number of scan threads for a query on a single node. The number of scan threads is limited, and the data returned from the bottom layer slows down, thereby limiting the overall computational resource overhead of the query. Assuming it is set to 2, a query can use up to 2 scan threads on a single node. - - This parameter will override the effect of `parallel_fragment_exec_instance_num`. That is, assuming that `parallel_fragment_exec_instance_num` is set to 4, and this parameter is set to 2. Then 4 execution instances on a single node will share up to 2 scanning threads. - - This parameter will be overridden by the `cpu_resource_limit` configuration in the user property. - - The default is -1, which means no limit. - -* `disable_join_reorder` - - Used to turn off all automatic join reorder algorithms in the system. There are two values: true and false.It is closed by default, that is, the automatic join reorder algorithm of the system is adopted. After set to true, the system will close all automatic sorting algorithms, adopt the original SQL table order, and execute join - -* `enable_infer_predicate` - - Used to control whether to perform predicate derivation. There are two values: true and false. It is turned off by default, that is, the system does not perform predicate derivation, and uses the original predicate to perform related operations. After it is set to true, predicate expansion is performed. - -* `return_object_data_as_binary` - Used to identify whether to return the bitmap/hll result in the select result. In the select into outfile statement, if the export file format is csv, the bimap/hll data will be base64-encoded, if it is the parquet file format, the data will be stored as a byte array - -* `block_encryption_mode` - The block_encryption_mode variable controls the block encryption mode. The default setting is empty, when use AES equal to `AES_128_ECB`, when use SM4 equal to `SM3_128_ECB` - available values: -``` - AES_128_ECB, - AES_192_ECB, - AES_256_ECB, - AES_128_CBC, - AES_192_CBC, - AES_256_CBC, - AES_128_CFB, - AES_192_CFB, - AES_256_CFB, - AES_128_CFB1, - AES_192_CFB1, - AES_256_CFB1, - AES_128_CFB8, - AES_192_CFB8, - AES_256_CFB8, - AES_128_CFB128, - AES_192_CFB128, - AES_256_CFB128, - AES_128_CTR, - AES_192_CTR, - AES_256_CTR, - AES_128_OFB, - AES_192_OFB, - AES_256_OFB, - SM4_128_ECB, - SM4_128_CBC, - SM4_128_CFB128, - SM4_128_OFB, - SM4_128_CTR, -``` \ No newline at end of file diff --git a/new-docs/en/advanced/alter-table/replace-table.md b/docs/en/advanced/alter-table/replace-table.md similarity index 100% rename from new-docs/en/advanced/alter-table/replace-table.md rename to docs/en/advanced/alter-table/replace-table.md diff --git a/new-docs/en/advanced/alter-table/schema-change.md b/docs/en/advanced/alter-table/schema-change.md similarity index 100% rename from new-docs/en/advanced/alter-table/schema-change.md rename to docs/en/advanced/alter-table/schema-change.md diff --git a/new-docs/en/advanced/best-practice/debug-log.md b/docs/en/advanced/best-practice/debug-log.md similarity index 100% rename from new-docs/en/advanced/best-practice/debug-log.md rename to docs/en/advanced/best-practice/debug-log.md diff --git a/new-docs/en/advanced/best-practice/import-analysis.md b/docs/en/advanced/best-practice/import-analysis.md similarity index 100% rename from new-docs/en/advanced/best-practice/import-analysis.md rename to docs/en/advanced/best-practice/import-analysis.md diff --git a/new-docs/en/advanced/best-practice/query-analysis.md b/docs/en/advanced/best-practice/query-analysis.md similarity index 100% rename from new-docs/en/advanced/best-practice/query-analysis.md rename to docs/en/advanced/best-practice/query-analysis.md diff --git a/new-docs/en/advanced/broker.md b/docs/en/advanced/broker.md similarity index 100% rename from new-docs/en/advanced/broker.md rename to docs/en/advanced/broker.md diff --git a/new-docs/en/advanced/cache/partition-cache.md b/docs/en/advanced/cache/partition-cache.md similarity index 100% rename from new-docs/en/advanced/cache/partition-cache.md rename to docs/en/advanced/cache/partition-cache.md diff --git a/new-docs/en/advanced/cache/query-cache.md b/docs/en/advanced/cache/query-cache.md similarity index 100% rename from new-docs/en/advanced/cache/query-cache.md rename to docs/en/advanced/cache/query-cache.md diff --git a/new-docs/en/advanced/join-optimization/bucket-shuffle-join.md b/docs/en/advanced/join-optimization/bucket-shuffle-join.md similarity index 100% rename from new-docs/en/advanced/join-optimization/bucket-shuffle-join.md rename to docs/en/advanced/join-optimization/bucket-shuffle-join.md diff --git a/new-docs/en/advanced/join-optimization/colocation-join.md b/docs/en/advanced/join-optimization/colocation-join.md similarity index 100% rename from new-docs/en/advanced/join-optimization/colocation-join.md rename to docs/en/advanced/join-optimization/colocation-join.md diff --git a/new-docs/en/advanced/join-optimization/runtime-filter.md b/docs/en/advanced/join-optimization/runtime-filter.md similarity index 100% rename from new-docs/en/advanced/join-optimization/runtime-filter.md rename to docs/en/advanced/join-optimization/runtime-filter.md diff --git a/new-docs/en/advanced/materialized-view.md b/docs/en/advanced/materialized-view.md similarity index 100% rename from new-docs/en/advanced/materialized-view.md rename to docs/en/advanced/materialized-view.md diff --git a/docs/en/administrator-guide/orthogonal-bitmap-manual.md b/docs/en/advanced/orthogonal-bitmap-manual.md similarity index 100% rename from docs/en/administrator-guide/orthogonal-bitmap-manual.md rename to docs/en/advanced/orthogonal-bitmap-manual.md diff --git a/new-docs/en/advanced/orthogonal-hll-manual.md b/docs/en/advanced/orthogonal-hll-manual.md similarity index 100% rename from new-docs/en/advanced/orthogonal-hll-manual.md rename to docs/en/advanced/orthogonal-hll-manual.md diff --git a/new-docs/en/advanced/partition/dynamic-partition.md b/docs/en/advanced/partition/dynamic-partition.md similarity index 100% rename from new-docs/en/advanced/partition/dynamic-partition.md rename to docs/en/advanced/partition/dynamic-partition.md diff --git a/new-docs/en/advanced/partition/table-temp-partition.md b/docs/en/advanced/partition/table-temp-partition.md similarity index 100% rename from new-docs/en/advanced/partition/table-temp-partition.md rename to docs/en/advanced/partition/table-temp-partition.md diff --git a/new-docs/en/advanced/resource.md b/docs/en/advanced/resource.md similarity index 100% rename from new-docs/en/advanced/resource.md rename to docs/en/advanced/resource.md diff --git a/new-docs/en/advanced/small-file-mgr.md b/docs/en/advanced/small-file-mgr.md similarity index 100% rename from new-docs/en/advanced/small-file-mgr.md rename to docs/en/advanced/small-file-mgr.md diff --git a/new-docs/en/advanced/time-zone.md b/docs/en/advanced/time-zone.md similarity index 100% rename from new-docs/en/advanced/time-zone.md rename to docs/en/advanced/time-zone.md diff --git a/new-docs/en/advanced/variables.md b/docs/en/advanced/variables.md similarity index 100% rename from new-docs/en/advanced/variables.md rename to docs/en/advanced/variables.md diff --git a/docs/en/administrator-guide/vectorized-execution-engine.md b/docs/en/advanced/vectorized-execution-engine.md similarity index 100% rename from docs/en/administrator-guide/vectorized-execution-engine.md rename to docs/en/advanced/vectorized-execution-engine.md diff --git a/docs/en/benchmark/samples.md b/docs/en/benchmark/samples.md deleted file mode 100644 index 309808c806..0000000000 --- a/docs/en/benchmark/samples.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -{ - "title": "Samples", - "language": "en" -} ---- - - - -# Samples - -Doris provides a wealth of usage samples, which can help Doris users quickly get started to experience the features of Doris. - -## Description - -The sample codes are stored in the [`samples/`](https://github.com/apache/incubator-doris/tree/master/samples) directory of the Doris code base. - -``` -├── connect -├── doris-demo -├── insert -└── mini_load -``` - -* `connect/` - - This catalog mainly shows the code examples of connecting Doris in various programming languages. - -* `doris-demo/` - - The code examples of the multiple functions of Doris are shown mainly in the form of Maven project. Such as spark-connector and flink-connector usage examples, integration with the Spring framework, Stream Load examples, and so on. - -* `insert/` - - This catalog shows some code examples of importing data through python or shell script calling Doris's Insert command. - -* `miniload/` - - This catalog shows the code example of calling mini load through python to import data. However, because the mini load function has been replaced by the stream load function, it is recommended to use the stream load function for data import. \ No newline at end of file diff --git a/new-docs/en/benchmark/ssb.md b/docs/en/benchmark/ssb.md similarity index 100% rename from new-docs/en/benchmark/ssb.md rename to docs/en/benchmark/ssb.md diff --git a/docs/en/benchmark/star-schema-benchmark.md b/docs/en/benchmark/star-schema-benchmark.md deleted file mode 100644 index da93925528..0000000000 --- a/docs/en/benchmark/star-schema-benchmark.md +++ /dev/null @@ -1,181 +0,0 @@ ---- -{ - "title": "Star-Schema-Benchmark", - "language": "en" -} ---- - - - -# Star Schema Benchmark - -[Star Schema Benchmark(SSB)](https://www.cs.umb.edu/~poneil/StarSchemaB.PDF) is a lightweight data warehouse scenario performance test set. Based on [TPC-H](http://www.tpc.org/tpch/), SSB provides a simplified version of the star model data set, which is mainly used to test the performance of multi-table association queries under the star model. - -This document mainly introduces how to pass the preliminary performance test of the SSB process in Doris. - -> Note 1: The standard test set including SSB is usually far from the actual business scenario, and some tests will perform parameter tuning for the test set. Therefore, the test results of the standard test set can only reflect the performance of the database in a specific scenario. It is recommended that users use actual business data for further testing. -> -> Note 2: The operations involved in this document are all performed in the CentOS 7 environment. - -## Environmental preparation - -Please refer to the [official document](http://doris.incubator.apache.org/master/en/installing/install-deploy.html) to install and deploy Doris to obtain a normal running Doris cluster ( Contain at least 1 FE, 1 BE). - -The scripts involved in the following documents are all stored under `tools/ssb-tools/` in the Doris code base. - -## data preparation - -### 1. Download and install the SSB data generation tool. - -Execute the following script to download and compile the [ssb-dbgen](https://github.com/electrum/ssb-dbgen.git) tool. - -``` -sh build-ssb-dbgen.sh -``` - -After the installation is successful, the `dbgen` binary file will be generated in the `ssb-dbgen/` directory. - -### 2. Generate SSB test set - -Execute the following script to generate the SSB data set: - -``` -sh gen-ssb-data.sh -s 100 -c 100 -``` - -> Note 1: Run `sh gen-ssb-data.sh -h` for help. -> -> Note 2: The data will be generated under the directory `ssb-data/` with a suffix of `.tbl`. The total file size is about 60GB. The generation time may vary from a few minutes to an hour. -> -> Note 3: `-s 100` means that the test set size factor is 100, `-c 100` means that 100 threads concurrently generate data in the lineorder table. The `-c` parameter also determines the number of files in the final lineorder table. The larger the parameter, the more files and the smaller each file. - -Under the `-s 100` parameter, the generated data set size is: - -|Table |Rows |Size | File Number | -|---|---|---|---| -|lineorder| 600 million (600037902) | 60GB | 100| -|customer|3 million (3000000) |277M |1| -|part|1.4 million (1400000) | 116M|1| -|supplier|200,000 (200,000) |17M |1| -|date| 2556|228K |1| - -3. Build a table - - Copy the table creation statement in [create-tables.sql](https://github.com/apache/incubator-doris/tree/master/tools/ssb-tools/create-tables.sql) and execute it in Doris. - -4. Import data - - 0. Prepare the 'doris-cluster.conf' file. - - Before calling the load script, you need to write the FE's ip port and other information in the `doris-cluster.conf` file. - - 'doris-cluster.conf' in the same directory as `load-dimension-data.sh`. - - The contents of the file include FE's ip, HTTP port, user name, password and the DB name of the data to be loaded: - - ```` - export FE_HOST="xxx" - export FE_HTTP_PORT="8030" - export USER="root" - export PASSWORD='xxx' - export DB="ssb" - ```` - - 1. Load 4 dimension table data (customer, part, supplier and date) - - Because the data volume of these 4 dimension tables is small, and the load is simpler, we use the following command to load the data of these 4 tables first: - - `sh load-dimension-data.sh` - - 2. Load the fact table lineorder. - - Load the lineorder table data with the following command: - - `sh load-fact-data.sh -c 5` - - `-c 5` means to start 5 concurrent threads to import (the default is 3). In the case of a single BE node, the load time of lineorder data generated by `sh gen-ssb-data.sh -s 100 -c 100` using `sh load-fact-data.sh -c 3` is about 10 minutes. The memory overhead is about 5-6GB. If you turn on more threads, you can speed up the load speed, but it will increase additional memory overhead. - - > Note: To get a faster import speed, you can add `flush_thread_num_per_store=5` in be.conf and restart BE. This configuration indicates the number of disk write threads for each data directory, and the default is 2. Larger data can increase write data throughput, but may increase IO Util. (Reference value: 1 mechanical disk, when the default is 2, the IO Util during the import process is about 12%, when it is set to 5, the IO Util is about 26%. If it is an SSD disk, it is almost 0) . - -5. Check the loaded data - - ``` - select count(*) from part; - select count(*) from customer; - select count(*) from supplier; - select count(*) from date; - select count(*) from lineorder; - ``` - - The amount of data should be the same as the number of rows of generated data. - -## Query test - -There are 4 groups of 14 SQL in the SSB test set. The query statement is in the [queries/](https://github.com/apache/incubator-doris/tree/master/tools/ssb-tools/queries) directory. - -## testing report - -The following test report is based on Doris [branch-0.15](https://github.com/apache/incubator-doris/tree/branch-0.15) branch code test, for reference only. (Update time: October 25, 2021) - -1. Hardware environment - - * 1 FE + 1-3 BE mixed - * CPU: 96core, Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz - * Memory: 384GB - * Hard disk: 1 HDD - * Network card: 10 Gigabit network card - -2. Data set - - |Table |Rows |Origin Size | Compacted Size(1 Replica) | - |---|---|---|---| - |lineorder| 600 million (600037902) | 60 GB | 14.846 GB | - |customer|3 million (3000000) |277 MB | 414.741 MB | - |part|1.4 million (1.400000) | 116 MB | 38.277 MB | - |supplier|200,000 (200,000) |17 MB | 27.428 MB | - |date| 2556|228 KB | 275.804 KB | - -3. Test results - - |Query |Time(ms) (1 BE) | Time(ms) (3 BE) | Parallelism | Runtime Filter Mode | - |---|---|---|---|---| - | q1.1 | 200 | 140 | 8 | IN | - | q1.2 | 90 | 80 | 8 | IN | - | q1.3 | 90 | 80 | 8 | IN | - | q2.1 | 1100 | 400 | 8 | BLOOM_FILTER | - | q2.2 | 900 | 330 | 8 | BLOOM_FILTER | - | q2.3 | 790 | 320 | 8 | BLOOM_FILTER | - | q3.1 | 3100 | 1280 | 8 | BLOOM_FILTER | - | q3.2 | 700 | 270 | 8 | BLOOM_FILTER | - | q3.3 | 540 | 270 | 8 | BLOOM_FILTER | - | q3.4 | 560 | 240 | 8 | BLOOM_FILTER | - | q4.1 | 2820 | 1150 | 8 | BLOOM_FILTER | - | q4.2 | 1430 | 670 | 8 | BLOOM_FILTER | - | q4.2 | 1750 | 1030 | 8 | BLOOM_FILTER | - - > Note 1: "This test set is far from your production environment, please be skeptical!" - > - > Note 2: The test result is the average value of multiple executions (Page Cache will play a certain acceleration role). And the data has undergone sufficient compaction (if you test immediately after importing the data, the query delay may be higher than the test result) - > - > Note 3: Due to environmental constraints, the hardware specifications used in this test are relatively high, but so many hardware resources will not be consumed during the entire test. The memory consumption is within 10GB, and the CPU usage is within 10%. - > - > Note 4: Parallelism means query concurrency, which is set by `set parallel_fragment_exec_instance_num=8`. - > - > Note 5: Runtime Filter Mode is the type of Runtime Filter, set by `set runtime_filter_type="BLOOM_FILTER"`. ([Runtime Filter](http://doris.incubator.apache.org/master/en/administrator-guide/runtime-filter.html) function has a significant effect on the SSB test set. Because in this test set, The data from the right table of Join can filter the left table very well. You can try to turn off this function through `set runtime_filter_mode=off` to see the change in query latency.) diff --git a/docs/en/benchmark/systemd.md b/docs/en/benchmark/systemd.md deleted file mode 100644 index 5180091b97..0000000000 --- a/docs/en/benchmark/systemd.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -{ - "title": "Systemd", - "language": "zh-CN" -} ---- - - - -# Systemd - -The Systemd configuration file is provided in the Doris code base, which can help users control the start and stop of the Doris service in Linux. - -Please go to [Code Base](https://github.com/apache/incubator-doris/tree/master/tools/systemd) to view the configuration file. diff --git a/new-docs/en/benchmark/tpc-h.md b/docs/en/benchmark/tpc-h.md similarity index 100% rename from new-docs/en/benchmark/tpc-h.md rename to docs/en/benchmark/tpc-h.md diff --git a/docs/en/community/how-to-contribute/commit-format-specification.md b/docs/en/community/how-to-contribute/commit-format-specification.md index 3b9034f106..da4fb59203 100644 --- a/docs/en/community/how-to-contribute/commit-format-specification.md +++ b/docs/en/community/how-to-contribute/commit-format-specification.md @@ -53,7 +53,7 @@ Commit is divided into ‘ title ’ and ‘ content ’ , the title should be l * deps: Modification of third-party dependency Library * community: Such as modification of Github issue template. - Some tips: + Some tips: 1. If there are multiple types in one commit, multiple types need to be added 2. If code refactoring brings performance improvement, [refactor][optimize] can be added at the same time @@ -80,7 +80,7 @@ Commit is divided into ‘ title ’ and ‘ content ’ , the title should be l * config * docs - Some tips: + Some tips: 1. Try to use options that already exist in the list. If you need to add, please update this document in time @@ -93,7 +93,7 @@ Commit is divided into ‘ title ’ and ‘ content ’ , the title should be l commit message should follow the following format: ``` - issue: #7777 + issue:#7777 your message ``` diff --git a/docs/en/community/release-and-verify/release-complete.md b/docs/en/community/release-and-verify/release-complete.md index 7abfec5727..e9db21ce84 100644 --- a/docs/en/community/release-and-verify/release-complete.md +++ b/docs/en/community/release-and-verify/release-complete.md @@ -44,10 +44,10 @@ https://dist.apache.org/repos/dist/release/incubator/doris/ For the first release, you need to copy the KEYS file as well. Then add it to the svn release. ``` -After add succeeds, you can see the files you published on the following website +add 成功后就可以在下面网址上看到你发布的文件 https://dist.apache.org/repos/dist/release/incubator/doris/0.xx.0-incubating/ -After a while, you can see on the official website of Apache: +稍等一段时间后,能在 apache 官网看到: http://www.apache.org/dist/incubator/doris/0.9.0-incubating/ ``` @@ -150,7 +150,7 @@ Title: [ANNOUNCE] Apache Doris (incubating) 0.9.0 Release ``` -To mail: +To mail: ``` dev@doris.apache.org diff --git a/new-docs/en/data-operate/export/export-manual.md b/docs/en/data-operate/export/export-manual.md similarity index 100% rename from new-docs/en/data-operate/export/export-manual.md rename to docs/en/data-operate/export/export-manual.md diff --git a/docs/en/administrator-guide/export_with_mysql_dump.md b/docs/en/data-operate/export/export_with_mysql_dump.md similarity index 100% rename from docs/en/administrator-guide/export_with_mysql_dump.md rename to docs/en/data-operate/export/export_with_mysql_dump.md diff --git a/new-docs/en/data-operate/export/outfile.md b/docs/en/data-operate/export/outfile.md similarity index 100% rename from new-docs/en/data-operate/export/outfile.md rename to docs/en/data-operate/export/outfile.md diff --git a/new-docs/en/data-operate/import/import-scenes/external-storage-load.md b/docs/en/data-operate/import/import-scenes/external-storage-load.md similarity index 100% rename from new-docs/en/data-operate/import/import-scenes/external-storage-load.md rename to docs/en/data-operate/import/import-scenes/external-storage-load.md diff --git a/new-docs/en/data-operate/import/import-scenes/external-table-load.md b/docs/en/data-operate/import/import-scenes/external-table-load.md similarity index 100% rename from new-docs/en/data-operate/import/import-scenes/external-table-load.md rename to docs/en/data-operate/import/import-scenes/external-table-load.md diff --git a/new-docs/en/data-operate/import/import-scenes/jdbc-load.md b/docs/en/data-operate/import/import-scenes/jdbc-load.md similarity index 100% rename from new-docs/en/data-operate/import/import-scenes/jdbc-load.md rename to docs/en/data-operate/import/import-scenes/jdbc-load.md diff --git a/new-docs/en/data-operate/import/import-scenes/kafka-load.md b/docs/en/data-operate/import/import-scenes/kafka-load.md similarity index 100% rename from new-docs/en/data-operate/import/import-scenes/kafka-load.md rename to docs/en/data-operate/import/import-scenes/kafka-load.md diff --git a/new-docs/en/data-operate/import/import-scenes/load-atomicity.md b/docs/en/data-operate/import/import-scenes/load-atomicity.md similarity index 100% rename from new-docs/en/data-operate/import/import-scenes/load-atomicity.md rename to docs/en/data-operate/import/import-scenes/load-atomicity.md diff --git a/new-docs/en/data-operate/import/import-scenes/load-data-convert.md b/docs/en/data-operate/import/import-scenes/load-data-convert.md similarity index 100% rename from new-docs/en/data-operate/import/import-scenes/load-data-convert.md rename to docs/en/data-operate/import/import-scenes/load-data-convert.md diff --git a/new-docs/en/data-operate/import/import-scenes/load-strict-mode.md b/docs/en/data-operate/import/import-scenes/load-strict-mode.md similarity index 100% rename from new-docs/en/data-operate/import/import-scenes/load-strict-mode.md rename to docs/en/data-operate/import/import-scenes/load-strict-mode.md diff --git a/new-docs/en/data-operate/import/import-scenes/local-file-load.md b/docs/en/data-operate/import/import-scenes/local-file-load.md similarity index 100% rename from new-docs/en/data-operate/import/import-scenes/local-file-load.md rename to docs/en/data-operate/import/import-scenes/local-file-load.md diff --git a/new-docs/en/data-operate/import/import-way/binlog-load-manual.md b/docs/en/data-operate/import/import-way/binlog-load-manual.md similarity index 100% rename from new-docs/en/data-operate/import/import-way/binlog-load-manual.md rename to docs/en/data-operate/import/import-way/binlog-load-manual.md diff --git a/new-docs/en/data-operate/import/import-way/broker-load-manual.md b/docs/en/data-operate/import/import-way/broker-load-manual.md similarity index 100% rename from new-docs/en/data-operate/import/import-way/broker-load-manual.md rename to docs/en/data-operate/import/import-way/broker-load-manual.md diff --git a/new-docs/en/data-operate/import/import-way/insert-into-manual.md b/docs/en/data-operate/import/import-way/insert-into-manual.md similarity index 100% rename from new-docs/en/data-operate/import/import-way/insert-into-manual.md rename to docs/en/data-operate/import/import-way/insert-into-manual.md diff --git a/new-docs/en/data-operate/import/import-way/load-json-format.md b/docs/en/data-operate/import/import-way/load-json-format.md similarity index 100% rename from new-docs/en/data-operate/import/import-way/load-json-format.md rename to docs/en/data-operate/import/import-way/load-json-format.md diff --git a/new-docs/en/data-operate/import/import-way/routine-load-manual.md b/docs/en/data-operate/import/import-way/routine-load-manual.md similarity index 100% rename from new-docs/en/data-operate/import/import-way/routine-load-manual.md rename to docs/en/data-operate/import/import-way/routine-load-manual.md diff --git a/new-docs/en/data-operate/import/import-way/s3-load-manual.md b/docs/en/data-operate/import/import-way/s3-load-manual.md similarity index 100% rename from new-docs/en/data-operate/import/import-way/s3-load-manual.md rename to docs/en/data-operate/import/import-way/s3-load-manual.md diff --git a/new-docs/en/data-operate/import/import-way/spark-load-manual.md b/docs/en/data-operate/import/import-way/spark-load-manual.md similarity index 100% rename from new-docs/en/data-operate/import/import-way/spark-load-manual.md rename to docs/en/data-operate/import/import-way/spark-load-manual.md diff --git a/new-docs/en/data-operate/import/import-way/stream-load-manual.md b/docs/en/data-operate/import/import-way/stream-load-manual.md similarity index 100% rename from new-docs/en/data-operate/import/import-way/stream-load-manual.md rename to docs/en/data-operate/import/import-way/stream-load-manual.md diff --git a/new-docs/en/data-operate/import/load-manual.md b/docs/en/data-operate/import/load-manual.md similarity index 100% rename from new-docs/en/data-operate/import/load-manual.md rename to docs/en/data-operate/import/load-manual.md diff --git a/new-docs/en/data-operate/update-delete/batch-delete-manual.md b/docs/en/data-operate/update-delete/batch-delete-manual.md similarity index 100% rename from new-docs/en/data-operate/update-delete/batch-delete-manual.md rename to docs/en/data-operate/update-delete/batch-delete-manual.md diff --git a/new-docs/en/data-operate/update-delete/delete-manual.md b/docs/en/data-operate/update-delete/delete-manual.md similarity index 100% rename from new-docs/en/data-operate/update-delete/delete-manual.md rename to docs/en/data-operate/update-delete/delete-manual.md diff --git a/new-docs/en/data-operate/update-delete/sequence-column-manual.md b/docs/en/data-operate/update-delete/sequence-column-manual.md similarity index 100% rename from new-docs/en/data-operate/update-delete/sequence-column-manual.md rename to docs/en/data-operate/update-delete/sequence-column-manual.md diff --git a/new-docs/en/data-operate/update-delete/update.md b/docs/en/data-operate/update-delete/update.md similarity index 100% rename from new-docs/en/data-operate/update-delete/update.md rename to docs/en/data-operate/update-delete/update.md diff --git a/new-docs/en/data-table/advance-usage.md b/docs/en/data-table/advance-usage.md similarity index 100% rename from new-docs/en/data-table/advance-usage.md rename to docs/en/data-table/advance-usage.md diff --git a/new-docs/en/data-table/basic-usage.md b/docs/en/data-table/basic-usage.md similarity index 100% rename from new-docs/en/data-table/basic-usage.md rename to docs/en/data-table/basic-usage.md diff --git a/new-docs/en/data-table/best-practice.md b/docs/en/data-table/best-practice.md similarity index 100% rename from new-docs/en/data-table/best-practice.md rename to docs/en/data-table/best-practice.md diff --git a/new-docs/en/data-table/data-model.md b/docs/en/data-table/data-model.md similarity index 100% rename from new-docs/en/data-table/data-model.md rename to docs/en/data-table/data-model.md diff --git a/new-docs/en/data-table/data-partition.md b/docs/en/data-table/data-partition.md similarity index 100% rename from new-docs/en/data-table/data-partition.md rename to docs/en/data-table/data-partition.md diff --git a/new-docs/en/data-table/hit-the-rollup.md b/docs/en/data-table/hit-the-rollup.md similarity index 100% rename from new-docs/en/data-table/hit-the-rollup.md rename to docs/en/data-table/hit-the-rollup.md diff --git a/new-docs/en/data-table/index/bitmap-index.md b/docs/en/data-table/index/bitmap-index.md similarity index 100% rename from new-docs/en/data-table/index/bitmap-index.md rename to docs/en/data-table/index/bitmap-index.md diff --git a/docs/en/administrator-guide/bloomfilter.md b/docs/en/data-table/index/bloomfilter.md similarity index 100% rename from docs/en/administrator-guide/bloomfilter.md rename to docs/en/data-table/index/bloomfilter.md diff --git a/new-docs/en/data-table/index/prefix-index.md b/docs/en/data-table/index/prefix-index.md similarity index 100% rename from new-docs/en/data-table/index/prefix-index.md rename to docs/en/data-table/index/prefix-index.md diff --git a/docs/en/developer-guide/be-vscode-dev.md b/docs/en/developer-guide/be-vscode-dev.md index 86c3c7f452..612f6f8710 100644 --- a/docs/en/developer-guide/be-vscode-dev.md +++ b/docs/en/developer-guide/be-vscode-dev.md @@ -32,7 +32,7 @@ under the License. 1. Download the doris source code - URL: [apache/incubator-doris: Apache Doris (Incubating) (github.com)](https://github.com/apache/incubator-doris) + URL:[apache/incubator-doris: Apache Doris (Incubating) (github.com)](https://github.com/apache/incubator-doris) 2. Install GCC 8.3.1+, Oracle JDK 1.8+, Python 2.7+, confirm that the gcc, java, python commands point to the correct version, and set the JAVA_HOME environment variable @@ -132,7 +132,7 @@ Need to create this folder, this is where the be data is stored mkdir -p /soft/be/storage ``` -3. Open vscode, and open the directory where the be source code is located. In this case, open the directory as **/home/workspace/incubator-doris/**,For details on how to vscode, refer to the online tutorial +3. Open vscode, and open the directory where the be source code is located. In this case, open the directory as **/home/workspace/incubator-doris/**,For details on how to vscode, refer to the online tutorial 4. Install the vscode ms c++ debugging plug-in, the plug-in identified by the red box in the figure below diff --git a/docs/en/developer-guide/benchmark-tool.md b/docs/en/developer-guide/benchmark-tool.md index 74b1ce3da1..536881d7d4 100644 --- a/docs/en/developer-guide/benchmark-tool.md +++ b/docs/en/developer-guide/benchmark-tool.md @@ -33,7 +33,7 @@ It can be used to test the performance of some parts of the BE storage layer (fo ## Compilation -1. To ensure that the environment has been able to successfully compile the Doris ontology, you can refer to [Installation and deployment] (https://doris.apache.org/master/en/installing/compilation.html). +1. To ensure that the environment has been able to successfully compile the Doris ontology, you can refer to [Installation and deployment] (https://doris.apache.org/master/en/installing/compilation.html)。 2. Execute`run-be-ut.sh` @@ -53,9 +53,9 @@ The data set is generated according to the following rules. >int: Random in [1,1000000]. The data character set of string type is uppercase and lowercase English letters, and the length varies according to the type. -> char: Length random in [1,8]. -> varchar: Length random in [1,128]. -> string: Length random in [1,100000]. +> char: Length random in [1,8]。 +> varchar: Length random in [1,128]。 +> string: Length random in [1,100000]。 `rows_number` indicates the number of rows of data, the default value is `10000`. diff --git a/docs/en/developer-guide/cpp-diagnostic-code.md b/docs/en/developer-guide/cpp-diagnostic-code.md index 642ce2595c..dd172d8206 100644 --- a/docs/en/developer-guide/cpp-diagnostic-code.md +++ b/docs/en/developer-guide/cpp-diagnostic-code.md @@ -26,7 +26,7 @@ under the License. # C++ Code Diagnostic -Doris support to use [Clangd](https://clangd.llvm.org/) and [Clang-Tidy](https://clang.llvm.org/extra/clang-tidy/) to diagnostic code. Clangd and Clang-Tidy already has in [LDB-toolchain](https://doris.apache.org/zh-CN/installing/compilation-with-ldb-toolchain),also can install by self. +Doris support to use [Clangd](https://clangd.llvm.org/) and [Clang-Tidy](https://clang.llvm.org/extra/clang-tidy/) to diagnostic code. Clangd and Clang-Tidy already has in [LDB-toolchain](https://doris.apache.org/zh-CN/installing/compilation-with-ldb-toolchain),also can install by self. ### Clang-Tidy Clang-Tidy can do some diagnostic cofig, config file `.clang-tidy` is in Doris root path. Compared with vscode-cpptools, clangd can provide more powerful and accurate code jumping for vscode, and integrates the analysis and quick-fix functions of clang-tidy. diff --git a/docs/en/developer-guide/fe-idea-dev.md b/docs/en/developer-guide/fe-idea-dev.md index afc90a0635..4146046a4b 100644 --- a/docs/en/developer-guide/fe-idea-dev.md +++ b/docs/en/developer-guide/fe-idea-dev.md @@ -46,16 +46,16 @@ under the License. Doris build against `thrift` 0.13.0 ( note : `Doris` 0.15 and later version build against `thrift` 0.13.0 , the previous version is still `thrift` 0.9.3) Windows: - 1. Download: `http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe` - 2. Copy: copy the file to `./thirdparty/installed/bin` + 1. Download:`http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe` + 2. Copy:copy the file to `./thirdparty/installed/bin` MacOS: - 1. Download: `brew install thrift@0.13.0` - 2. Establish soft connection: + 1. Download:`brew install thrift@0.13.0` + 2. Establish soft connection: `mkdir -p ./thirdparty/installed/bin` `ln -s /opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift ./thirdparty/installed/bin/thrift` - Note: The error that the version cannot be found may be reported when MacOS execute `brew install thrift@0.13.0`. The solution is execute at the terminal as follows: + Note:The error that the version cannot be found may be reported when MacOS execute `brew install thrift@0.13.0`. The solution is execute at the terminal as follows: 1. `brew tap-new $USER/local-tap` 2. `brew extract --version='0.13.0' thrift $USER/local-tap` 3. `brew install thrift@0.13.0` diff --git a/docs/en/developer-guide/fe-vscode-dev.md b/docs/en/developer-guide/fe-vscode-dev.md index e839449a7f..e90fc05269 100644 --- a/docs/en/developer-guide/fe-vscode-dev.md +++ b/docs/en/developer-guide/fe-vscode-dev.md @@ -47,7 +47,7 @@ Create `settings.json` in `.vscode/` , and set settings: * `"java.configuration.runtimes"` * `"java.jdt.ls.java.home"` -- must set it to the directory of JDK11+, used for vscode-java plugin -* `"maven.executable.path"` -- maven path,for maven-language-server plugin +* `"maven.executable.path"` -- maven path,for maven-language-server plugin example: diff --git a/docs/en/extending-doris/audit-plugin.md b/docs/en/ecosystem/audit-plugin.md similarity index 100% rename from docs/en/extending-doris/audit-plugin.md rename to docs/en/ecosystem/audit-plugin.md diff --git a/new-docs/en/ecosystem/datax.md b/docs/en/ecosystem/datax.md similarity index 100% rename from new-docs/en/ecosystem/datax.md rename to docs/en/ecosystem/datax.md diff --git a/docs/en/ecosystem/doris-manager/cluster-managenent.md b/docs/en/ecosystem/doris-manager/cluster-managenent.md new file mode 100644 index 0000000000..5315294005 --- /dev/null +++ b/docs/en/ecosystem/doris-manager/cluster-managenent.md @@ -0,0 +1,69 @@ +--- +{ + "title": "Cluster management", + "language": "en" +} +--- + + + +# Cluster management + +The super administrator and space administrator can mainly perform the following operations under the cluster module: + +- View cluster overview +- View node list +- Edit parameter configuration + +## Cluster overview + +### View basic cluster information + +Cluster function, showing a cluster-based monitoring panel. + +On the home page, click "Cluster" in the navigation bar to enter the cluster function. + +![](/images/doris-manager/iclustermanager-1.png) + +The operation and maintenance monitoring panel provides various performance monitoring indicators of the cluster for users to gain insight into the cluster status. Users can control the start and stop operations of the cluster through buttons in the upper right corner. + +### View cluster resource usage + +Users can view disk usage through pie charts, and view the number of databases, etc. + +## Node list + +Displays information about FE nodes, BE nodes, and brokers in the cluster. +Provides fields including Node ID, Node Type, Host IP, and Node Status. + +![](/images/doris-manager/iclustermanager-2.png) + +## Parameter configuration + +Parameter configuration provides parameter name, parameter type, parameter value type, thermal effect and operation fields. + +![](/images/doris-manager/iclustermanager-3.png) + +- **Operation**: Click the "Edit" button, you can edit and modify the corresponding configuration value, you can choose the corresponding effective method; click the "View current value" button, you can view the current value corresponding to the host IP + +![](/images/doris-manager/iclustermanager-4.png) + +![](/images/doris-manager/iclustermanager-5.png) + diff --git a/docs/en/ecosystem/doris-manager/compiling-deploying.md b/docs/en/ecosystem/doris-manager/compiling-deploying.md new file mode 100644 index 0000000000..1062cfd728 --- /dev/null +++ b/docs/en/ecosystem/doris-manager/compiling-deploying.md @@ -0,0 +1,112 @@ +--- +{ + "title": "Compile and deploy", + "language": "en" +} +--- + + + +# Compile and deploy + +## Compile + +Running the build.sh script under the manager path directly will generate the installation and running package -- output under the manager path, including: +1. Doris Manager's running package doris-manager.jar +2. The running configuration folder conf +3. Start the script start_manager.sh +4. Stop the script stop_manager.sh + +## Run + +### 1 Configuration + +Enter the generated installation and running package, view the configuration file conf path, and open the configuration file manager.conf in the path. The configuration items to focus on are as follows: + +````$xslt +The service's startup http port +STUDIO_PORT=8080 + +The type of database where the backend data is stored, including mysql/h2/postgresql. The default is to support mysql +MB_DB_TYPE=mysql + +Database connection information +If it is a configured h2 type database, you do not need to configure this information, and the data will be stored locally as a local file +h2 data file storage path, directly stored in the current path by default +H2_FILE_PATH= + +If it is mysql/postgresql, you need to configure the following connection information +database address +MB_DB_HOST= + +database port +MB_DB_PORT=3306 + +database access port +MB_DB_USER= + +Database access password +MB_DB_PASS= + +database name of the database +MB_DB_DBNAME= + +The path where the service runs, which is directly stored in the log folder of the current running path by default. +LOG_PATH= + +The length of the waiting queue of the web container, the default is 100. The queue is also used as a buffer pool, but it cannot be infinitely long. It not only consumes memory, but also consumes CPU when entering the queue. +WEB_ACCEPT_COUNT=100 + +The maximum number of worker threads for the web container, 200 by default. (usually the number of CPU cores * 200) +WEB_MAX_THREADS=200 + +The minimum number of working idle threads for the web container, the default is 10. (Appropriately increase some to cope with the sudden increase in traffic) +WEB_MIN_SPARE_THREADS=10 + +The maximum number of connections for the web container, the default is 10000. (Appropriately increase some to cope with the sudden increase in traffic) +WEB_MAX_CONNECTIONS=10000 + +The maximum number of connections to access the database connection pool, the default is 10 +DB_MAX_POOL_SIZE=20 + +The minimum number of idle connections to access the database connection pool, the default is 10 +DB_MIN_IDLE=10 +```` + +### 2 Start + +After the configuration modification is completed, go back to the installation and run package and run the following command directly + +````$xslt +nohup sh ./start_manager.sh > start.log 2>&1 & +```` + +Check the logs in the logs to determine whether the program started successfully + +### 3 Use + +Doris Manager presets a super administrator user with the following information: + +````$xslt +Username: Admin +Password: Admin@123 +```` + +To ensure safe use, please change your password after logging in! diff --git a/docs/en/ecosystem/doris-manager/initializing.md b/docs/en/ecosystem/doris-manager/initializing.md new file mode 100644 index 0000000000..3c867d81c6 --- /dev/null +++ b/docs/en/ecosystem/doris-manager/initializing.md @@ -0,0 +1,43 @@ +--- +{ + "title": "Initialize", + "language": "en" +} +--- + + + +# Initialize + +After the deployment is complete, the super administrator needs to complete the local initialization. + +## Manage users + +The first step of initialization is to manage users, which mainly completes the selection and configuration of authentication methods. Currently Doris Manger supports local user authentication. + +![](/images/doris-manager/initializing-1.png) + +### Local user authentication + +Local user authentication is the user system that comes with Doris Manger. User registration can be completed by filling in the user name, email address and password. User addition, information modification, deletion and permission relationship are all completed locally. + +![](/images/doris-manager/initializing-2.png) + +At this point, the local initialization process has been completed. Super administrators can create spaces, space administrators can enter the space, manage the space, add and invite users to enter the space for data analysis, etc. \ No newline at end of file diff --git a/docs/en/ecosystem/doris-manager/space-list.md b/docs/en/ecosystem/doris-manager/space-list.md new file mode 100644 index 0000000000..543cb80973 --- /dev/null +++ b/docs/en/ecosystem/doris-manager/space-list.md @@ -0,0 +1,234 @@ +--- +{ + "title": "Space list", + "language": "en" +} +--- + + + +# Space list + +The super administrator can perform the following operations in the space list: + +- Perform new cluster and cluster hosting operations + +- Recovery and deletion of unfinished spaces + +- Completed space deletion operation + +The space administrator can mainly perform the following operations in the space list: + +- View authorized space information + +## Completed space + +The super administrator can operate the completed space through the button to the right of the space name. Space administrators can click to enter the space to manage clusters or data in the space. + +![](/images/doris-manager/spacelist-1.png) + +## Unfinished space + +Doris Manger provides a draft save function of the space creation process to record the incomplete space creation process. Super administrators can view the list of unfinished spaces by switching tabs, and perform recovery or deletion operations. + +![](/images/doris-manager/spacelist-2.png) + +# New space + +There are two ways to create a new space: new cluster and cluster hosting. + +## New cluster + +### 1 Registration space + +Space information includes space name, space introduction, and selection of space administrators. + +Space name and administrator are required/optional fields. + +![](/images/doris-manager/spacelist-3.png) + +### 2 Add host + +![](/images/doris-manager/spacelist-4.png) + +#### Configure SSH login-free + +Doris Manager needs to distribute the Agent installation package during installation, so it is necessary to configure SSH login-free on the server (agent01) where Doris is to be installed. + +```shell +#1. To log in to the server, you need to use the manager and agent accounts to be consistent +su - xxx +pwd +#2. Generate a key pair on the machine where doris manager is deployed +ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa + +#3. Copy the public key to the machine agent01 +scp ~/.ssh/id_rsa.pub root@agent01:~ + +#4. Log in to agent01 and append the public key to authorized_keys +cat ~/id_rsa.pub >> .ssh/authorized_keys + +#5. After doing this, we can log in to agent01 without password on the doris manger machine +ssh agent01@xx.xxx.xx.xx +```` + +In addition, it should be noted that the permissions of the .ssh directory are 700, and the permissions of the authorized_keys and private keys under it are 600. Otherwise, you will not be able to log in without a password due to permission issues. We can see that the known_hosts file will be generated after logging in. At the same time, when starting doris, you need to use a password-free login account. + +When installing a cluster in Doris Manager, just use the private key of the doris manager machine, ie ~/.ssh/id_rsa + +For details, please refer to: https://blog.csdn.net/universe_hao/article/details/52296811 + +#### Host list + +Enter the host IP to add a new host, or add it in batches. + +### 3 Installation options + +#### Get the installation package + +When deploying a cluster through Doris Manager, you need to provide the compiled Doris installation package. You can compile it yourself from the Doris source code, or use the official binary version. + +Doris Manager will pull the installation package through http. If you need to build your own http service, please refer to the bottom of the document - Self-built http service. + +#### Specify the installation path + +Doris and Doris Manger Agent will be installed in this directory. Make sure this directory is dedicated to Doirs and related components. + +### 4 Verify the host + +The system will automatically perform verification according to the host status. When the verification is completed, the Agent will start sending back the heartbeat, and you can click to proceed to the next step. + +![](/images/doris-manager/spacelist-5.png) + +### 5 Planning Nodes + +Click the Assign Node button to plan FE/BE/Broker nodes for the host. + +![](/images/doris-manager/spacelist-6.png) + +### 6 Configuration Parameters + +Configure parameters for the nodes planned in the previous step. You can use the default values or turn on the custom configuration switch to customize the configuration. + +### 7 Deploy the cluster + +The system will automatically perform verification according to the status of the host installation progress. When the verification is completed, it will start the node and return the heartbeat. You can click to proceed to the next step. + +![](/images/doris-manager/spacelist-7.png) + +### 8 Complete the creation + +Complete the above steps to complete the new cluster. + +![](/images/doris-manager/spacelist-8.png) + +## Cluster hosting + +### 1 Registration space + +Space information includes space name, space introduction, and selection of space administrators. + +Space name and administrator are required/optional fields. + +### 2 Connect to the cluster + +Cluster information includes cluster address, HTTP port, JDBC port, cluster username, and cluster password. Users can fill in according to their own cluster information. + +Click the Link Test button to test it. + +### 3 Hosting Options + +![](/images/doris-manager/spacelist-9.png) + +#### Configure SSH login-free + +Doris Manager needs to distribute the Agent installation package during installation, so it is necessary to configure SSH login-free on the server (agent01) where Doris is to be installed. + +```shell +#1. To log in to the server, you need to use the manger and agent accounts to be consistent +su - xxx +pwd +#2. Generate a key pair on the machine where doris manager is deployed +ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa + +#3. Copy the public key to the machine agent01 +scp ~/.ssh/id_rsa.pub root@agent01:~ + +#4. Log in to agent01 and append the public key to authorized_keys +cat ~/id_rsa.pub >> .ssh/authorized_keys + +#5. After doing this, we can log in to agent01 without password on the doris manger machine +ssh agent01@xx.xxx.xx.xx +```` + +In addition, it should be noted that the permissions of the .ssh directory are 700, and the permissions of the authorized_keys and private keys under it are 600. Otherwise, you will not be able to log in without a password due to permission issues. We can see that the known_hosts file will be generated after logging in. At the same time, when starting doris, you need to use a password-free login account. + +When installing a cluster in Doris Manager, just use the private key of the doris manager machine, ie ~/.ssh/id_rsa + +For details, please refer to: https://blog.csdn.net/universe_hao/article/details/52296811 + +#### Specify the installation path + +Doris and Doris Manger Agent will be installed in this directory. Make sure this directory is dedicated to Doirs and related components. + +### 4 Verify the host + +The system will automatically perform verification according to the host status. When the verification is completed, the Agent will start sending back the heartbeat, and you can click to proceed to the next step. + +![](/images/doris-manager/spacelist-10.png) + +### 5 Verify the cluster + +Verify the cluster quantile instance installation verification, instance dependency verification, and instance startup verification. After the verification is successful, click Next to complete the creation. + +![](/images/doris-manager/spacelist-11.png) + +### 6 Complete access + +Complete the above steps to complete cluster hosting. + +## Self-built http service + +### 1 yum source installation + +1. Installation +yum install -y nginx +2. Start +systemctl start nginx + +### 2 Source installation + +Reference: https://www.runoob.com/linux/nginx-install-setup.html + +### 3 Configuration + +1. Put the doris installation package in the nginx root directory +mv PALO-0.15.1-rc03-binary.tar.gz /usr/share/nginx/html + +2. Modify ngixn.conf + +```` +location /download { + alias /home/work/nginx/nginx/html/; +} +```` + +Restart ngxin access after modification: +https://host:port/download/PALO-0.15.1-rc03-binary.tar.gz \ No newline at end of file diff --git a/docs/en/ecosystem/doris-manager/space-management.md b/docs/en/ecosystem/doris-manager/space-management.md new file mode 100644 index 0000000000..c6df3098e4 --- /dev/null +++ b/docs/en/ecosystem/doris-manager/space-management.md @@ -0,0 +1,53 @@ +--- +{ + "title": "Space management", + "language": "en" +} +--- + + + +# Space management + +If you are Doris Manger's space administrator, you will have permission to set or manage the space, space members, roles, etc. + +## Space + +The space administrator can mainly perform the following operations under the space module: + +- Edit saved space information + +### Edit space information + +If the space information is complete, space administrators can view and edit space-related information here, including space name, space introduction, space administrator, etc. + +![](/images/doris-manager/spacemanagement-1.png) + +## Members + +In the secondary navigation bar of the "Space Management" interface, select "Members" to enter the member management page. This page can view all users in the current space, and you can remove members. + +![](/images/doris-manager/spacemanagement-2.png) + +## Role + +Space administrators can view all roles and role members by clicking the "Roles" button in the navigation bar. New members by default belong to the "Space Member" role and the "Space Admin" role. The default roles are "Space Admin" and "Space Member" and cannot be changed by other administrators. + +![](/images/doris-manager/spacemanagement-3.png) diff --git a/docs/en/ecosystem/doris-manager/system-settings.md b/docs/en/ecosystem/doris-manager/system-settings.md new file mode 100644 index 0000000000..bae7c2a16f --- /dev/null +++ b/docs/en/ecosystem/doris-manager/system-settings.md @@ -0,0 +1,91 @@ +--- +{ + "title": "System settings", + "language": "en" +} +--- + + + +# System settings + +The super administrator can mainly perform the following operations under the platform module: + +- Perform relevant operations on platform users +- Have the highest level of authority on the platform + +User permission description + +## users + +### User management under local authentication + +Click the Add User button to create a new user with username and email information. + + Doris Manger will assign a temporary password to the new user. The new user needs to log in with the set username/email and temporary password. After logging in, you can create a new password in "Account Settings". + +![](/images/doris-manager/systemsettings-1.png) + +![](/images/doris-manager/systemsettings-2.png) + + +### Edit User + +Super administrators can manage users, including editing user information, resetting user passwords, and deactivating users. + +#### Edit user information + +Click to select and select "Edit" to modify the user name and email address. If the user mailbox is updated, the user needs to log in with the updated mailbox, and the password will not be updated. + +![](/images/doris-manager/systemsettings-3.png) + +#### reset user password + +Click to select "Reset Password", and after confirming this operation, Doris Manger will reassign a temporary password for the user. The user needs to log in with the set email address and the new temporary password. After logging in, you can go to "Account Settings" Create a new password. + + +#### Deactivate/Activate User + +Click Opt-out user, and after confirming to deactivate the user, the user's status will be changed from active to inactive. Deactivated users will not be able to log in to Doris Manger. + +Click Activate User on the right side of the user to reactivate the user. The user's status will be changed back to enabled and will be able to log in to Doris Manger again. + +Note that super administrators cannot deactivate their own user accounts, and there must be at least one non-deactivated super administrator user in the system. + +![](/images/doris-manager/systemsettings-4.png) + + +## User permission description + +### Super administrator privileges + +| | Create | Edit | Delete | View | +| :---- | :----- | :--- | :----- | :--- | +| User | ✓ | ✓ | ✓ | ✓ | +| Roles | ✓ | ✓ | ✓ | ✓ | +| Space | ✓ | ✓ | ✓ | ✓ | + +### Space administrator permissions + +| | Create | Edit | Delete | View | +| :---- | :----- | :--- | :----- | :--- | +| User | X | X | X | X | +| Roles | X | X | X | ✓ | +| Space | X | ✓ | X | ✓ | diff --git a/new-docs/en/ecosystem/external-table/doris-on-es.md b/docs/en/ecosystem/external-table/doris-on-es.md similarity index 100% rename from new-docs/en/ecosystem/external-table/doris-on-es.md rename to docs/en/ecosystem/external-table/doris-on-es.md diff --git a/new-docs/en/ecosystem/external-table/hive-of-doris.md b/docs/en/ecosystem/external-table/hive-of-doris.md similarity index 100% rename from new-docs/en/ecosystem/external-table/hive-of-doris.md rename to docs/en/ecosystem/external-table/hive-of-doris.md diff --git a/new-docs/en/ecosystem/external-table/iceberg-of-doris.md b/docs/en/ecosystem/external-table/iceberg-of-doris.md similarity index 100% rename from new-docs/en/ecosystem/external-table/iceberg-of-doris.md rename to docs/en/ecosystem/external-table/iceberg-of-doris.md diff --git a/new-docs/en/ecosystem/external-table/odbc-of-doris.md b/docs/en/ecosystem/external-table/odbc-of-doris.md similarity index 100% rename from new-docs/en/ecosystem/external-table/odbc-of-doris.md rename to docs/en/ecosystem/external-table/odbc-of-doris.md diff --git a/new-docs/en/ecosystem/flink-doris-connector.md b/docs/en/ecosystem/flink-doris-connector.md similarity index 100% rename from new-docs/en/ecosystem/flink-doris-connector.md rename to docs/en/ecosystem/flink-doris-connector.md diff --git a/new-docs/en/ecosystem/logstash.md b/docs/en/ecosystem/logstash.md similarity index 100% rename from new-docs/en/ecosystem/logstash.md rename to docs/en/ecosystem/logstash.md diff --git a/docs/en/extending-doris/plugin-development-manual.md b/docs/en/ecosystem/plugin-development-manual.md similarity index 100% rename from docs/en/extending-doris/plugin-development-manual.md rename to docs/en/ecosystem/plugin-development-manual.md diff --git a/new-docs/en/ecosystem/seatunnel/flink-sink.md b/docs/en/ecosystem/seatunnel/flink-sink.md similarity index 100% rename from new-docs/en/ecosystem/seatunnel/flink-sink.md rename to docs/en/ecosystem/seatunnel/flink-sink.md diff --git a/new-docs/en/ecosystem/seatunnel/spark-sink.md b/docs/en/ecosystem/seatunnel/spark-sink.md similarity index 100% rename from new-docs/en/ecosystem/seatunnel/spark-sink.md rename to docs/en/ecosystem/seatunnel/spark-sink.md diff --git a/new-docs/en/ecosystem/spark-doris-connector.md b/docs/en/ecosystem/spark-doris-connector.md similarity index 100% rename from new-docs/en/ecosystem/spark-doris-connector.md rename to docs/en/ecosystem/spark-doris-connector.md diff --git a/new-docs/en/ecosystem/udf/contribute-udf.md b/docs/en/ecosystem/udf/contribute-udf.md similarity index 100% rename from new-docs/en/ecosystem/udf/contribute-udf.md rename to docs/en/ecosystem/udf/contribute-udf.md diff --git a/new-docs/en/ecosystem/udf/native-user-defined-function.md b/docs/en/ecosystem/udf/native-user-defined-function.md similarity index 100% rename from new-docs/en/ecosystem/udf/native-user-defined-function.md rename to docs/en/ecosystem/udf/native-user-defined-function.md diff --git a/new-docs/en/ecosystem/udf/remote-user-defined-function.md b/docs/en/ecosystem/udf/remote-user-defined-function.md similarity index 100% rename from new-docs/en/ecosystem/udf/remote-user-defined-function.md rename to docs/en/ecosystem/udf/remote-user-defined-function.md diff --git a/docs/en/extending-doris/datax.md b/docs/en/extending-doris/datax.md deleted file mode 100644 index c8762d68a4..0000000000 --- a/docs/en/extending-doris/datax.md +++ /dev/null @@ -1,104 +0,0 @@ ---- -{ - "title": "DataX doriswriter", - "language": "en" -} ---- - - - -# DataX doriswriter - -[DataX](https://github.com/alibaba/DataX) doriswriter plug-in, used to synchronize data from other data sources to Doris through DataX. - -The plug-in uses Doris' Stream Load function to synchronize and import data. It needs to be used with DataX service. - -## About DataX - -DataX is an open source version of Alibaba Cloud DataWorks data integration, an offline data synchronization tool/platform widely used in Alibaba Group. DataX implements efficient data synchronization functions between various heterogeneous data sources including MySQL, Oracle, SqlServer, Postgre, HDFS, Hive, ADS, HBase, TableStore (OTS), MaxCompute (ODPS), Hologres, DRDS, etc. - -More details can be found at: `https://github.com/alibaba/DataX/` - -## Usage - -The code of DataX doriswriter plug-in can be found [here](https://github.com/apache/incubator-doris/tree/master/extension/DataX). - -This directory is the doriswriter plug-in development environment of Alibaba DataX. - -Because the doriswriter plug-in depends on some modules in the DataX code base, and these module dependencies are not submitted to the official Maven repository, when we develop the doriswriter plug-in, we need to download the complete DataX code base to facilitate our development and compilation of the doriswriter plug-in. - -### Directory structure - -1. `doriswriter/` - - This directory is the code directory of doriswriter, and this part of the code should be in the Doris code base. - - The help doc can be found in `doriswriter/doc` - -2. `init-env.sh` - - The script mainly performs the following steps: - - 1. Git clone the DataX code base to the local - 2. Softlink the `doriswriter/` directory to `DataX/doriswriter`. - 3. Add `doriswriter` to the original `DataX/pom.xml` - 4. Change httpclient version from 4.5 to 4.5.13 in DataX/core/pom.xml - - > httpclient v4.5 can not handle redirect 307 correctly. - - After that, developers can enter `DataX/` for development. And the changes in the `DataX/doriswriter` directory will be reflected in the `doriswriter/` directory, which is convenient for developers to submit code. - -### How to build - -1. Run `init-env.sh` -2. Modify code of doriswriter in `DataX/doriswriter` if you need. -3. Build doriswriter - - 1. Build doriswriter along: - - `mvn clean install -pl plugin-rdbms-util,doriswriter -DskipTests` - - 2. Build DataX: - - `mvn package assembly:assembly -Dmaven.test.skip=true` - - The output will be in `target/datax/datax/`. - - > hdfsreader, hdfswriter and oscarwriter needs some extra jar packages. If you don't need to use these components, you can comment out the corresponding module in DataX/pom.xml. - - 3. Compilation error - - If you encounter the following compilation errors: - - ``` - Could not find artifact com.alibaba.datax:datax-all:pom:0.0.1-SNAPSHOT ... - ``` - - You can try the following solutions: - - 1. Download [alibaba-datax-maven-m2-20210928.tar.gz](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/alibaba-datax-maven-m2-20210928.tar.gz) - 2. After decompression, copy the resulting `alibaba/datax/` directory to `.m2/repository/com/alibaba/` corresponding to the maven used. - 3. Try to compile again. - -4. Commit code of doriswriter in `doriswriter` if you need. - -### Example - -For instructions on using the doriswriter plug-in, please refer to [here](https://github.com/apache/incubator-doris/blob/master/extension/DataX/doriswriter/doc/doriswriter.md). diff --git a/docs/en/extending-doris/doris-on-es.md b/docs/en/extending-doris/doris-on-es.md deleted file mode 100644 index 79aa207109..0000000000 --- a/docs/en/extending-doris/doris-on-es.md +++ /dev/null @@ -1,589 +0,0 @@ ---- -{ - "title": "Doris On ES", - "language": "en" -} ---- - - - -# Doris On ES - -Doris-On-ES not only take advantage of Doris's distributed query planning capability but also ES (Elastic search)'s full-text search capability, provide a more complete OLAP scenario solution: - -1. Multi-index Distributed Join Query in ES -2. Joint Query of Tables in Doris and ES, More Complex Full-Text Retrieval and Filtering - -This document mainly introduces the realization principle and usage of this function. - -## Glossary - -### Noun in Doris - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. - -### Noun in ES - -* DataNode: The data storage and computing node of ES. -* MasterNode: The Master node of ES, which manages metadata, nodes, data distribution, etc. -* scroll: The built-in data set cursor feature of ES for streaming scanning and filtering of data. -* _source: contains the original JSON document body that was passed at index time -* doc_values: store the same values as the _source but in a column-oriented fashion -* keyword: string datatype in ES, but the content not analyzed by analyzer -* text: string datatype in ES, the content analyzed by analyzer - - -## How To Use - -### Create ES Index - -``` -PUT test -{ - "settings": { - "index": { - "number_of_shards": "1", - "number_of_replicas": "0" - } - }, - "mappings": { - "doc": { // There is no need to specify the type when creating indexes after ES7.x version, there is one and only type of `_doc` - "properties": { - "k1": { - "type": "long" - }, - "k2": { - "type": "date" - }, - "k3": { - "type": "keyword" - }, - "k4": { - "type": "text", - "analyzer": "standard" - }, - "k5": { - "type": "float" - } - } - } - } -} -``` - -### Add JSON documents to ES index - -``` -POST /_bulk -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "Trying out Elasticsearch", "k4": "Trying out Elasticsearch", "k5": 10.0} -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "Trying out Doris", "k4": "Trying out Doris", "k5": 10.0} -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "Doris On ES", "k4": "Doris On ES", "k5": 10.0} -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "Doris", "k4": "Doris", "k5": 10.0} -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "ES", "k4": "ES", "k5": 10.0} -``` - -### Create external ES table - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH // ENGINE must be Elasticsearch -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", - -"user" = "root", -"password" = "root" -); -``` - -The following parameters are accepted by ES table: - -Parameter | Description ----|--- -**hosts** | ES Cluster Connection Address, maybe one or more node, load-balance is also accepted -**index** | the related ES index name, alias is supported, and if you use doc_value, you need to use the real name -**type** | the type for this index, If not specified, `_doc` will be used -**user** | username for ES -**password** | password for the user - -* For clusters before 7.x, please pay attention to choosing the correct type when building the table -* The authentication method only supports Http Basic authentication, need to ensure that this user has access to: /\_cluster/state/, \_nodes/http and other paths and index read permissions;The cluster has not turned on security authentication, and the user name and password do not need to be set -* The column names in the Doris table need to exactly match the field names in the ES, and the field types should be as consistent as possible -* **ENGINE** must be: **Elasticsearch** - -##### Filter to push down - -An important ability of `Doris On ES` is the push-down of filter conditions: The filtering conditions are pushed to ES, so that only the data that really meets the conditions will be returned, which can significantly improve query performance and reduce CPU, memory, and IO utilization of Doris and ES - -The following operators (Operators) will be optimized to the following ES Query: - -| SQL syntax | ES 5.x+ syntax | -|-------|:---:| -| = | term query| -| in | terms query | -| > , < , >= , ⇐ | range query | -| and | bool.filter | -| or | bool.should | -| not | bool.must_not | -| not in | bool.must_not + terms query | -| is\_not\_null | exists query | -| is\_null | bool.must_not + exists query | -| esquery | QueryDSL in ES native json form | - -##### Data type mapping - -Doris\ES | byte | short | integer | long | float | double| keyword | text | date -------------- | ------------- | ------ | ---- | ----- | ---- | ------ | ----| --- | --- | -tinyint | √ | | | | | | | | -smallint | √ | √ | | | | | | | -int | √ | √ | √ | | | | | | -bigint | √ | √ | √ | √ | | | | | -float | | | | | √ | | | | -double | | | | | | √ | | | -char | | | | | | | √ | √ | -varchar | | | | | | | √ | √ | -date | | | | | | | | | √| -datetime | | | | | | | | | √| - - -### Enable column scan to optimize query speed(enable\_docvalue\_scan=true) - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", -"user" = "root", -"password" = "root", - -"enable_docvalue_scan" = "true" -); -``` - -Parameter Description: - -Parameter | Description ----|--- -**enable\_docvalue\_scan** | whether to enable ES/Lucene column storage to get the value of the query field, the default is false - -Doris obtains data from ES following the following two principles: - -* **Best effort**: Automatically detect whether the column to be read has column storage enabled (doc_value: true).If all the fields obtained have column storage, Doris will obtain the values ​​of all fields from the column storage(doc_values) -* **Automatic downgrade**: If the field to be obtained has one or more field that is not have doc_value, the values ​​of all fields will be parsed from the line store `_source` - -##### Advantage: - -By default, Doris On ES will get all the required columns from the row storage, which is `_source`, and the storage of `_source` is the origin json format document, Inferior to column storage in batch read performance, Especially obvious when only a few columns are needed, When only a few columns are obtained, the performance of docvalue is about ten times that of _source - -##### Tip -1. Fields of type `text` are not column-stored in ES, so if the value of the field to be obtained has a field of type `text`, it will be automatically downgraded to get from `_source` -2. In the case of too many fields obtained (`>= 25`), the performance of getting field values ​​from `docvalue` will be basically the same as getting field values ​​from `_source` - - -### Detect keyword type field(enable\_keyword\_sniff=true) - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", -"user" = "root", -"password" = "root", - -"enable_keyword_sniff" = "true" -); -``` - -Parameter Description: - -Parameter | Description ----|--- -**enable\_keyword\_sniff** | Whether to detect the string type (**text**) `fields` in ES to obtain additional not analyzed (**keyword**) field name(multi-fields mechanism) - -You can directly import data without creating an index. At this time, ES will automatically create a new index in ES, For a field of type string, a field of type `text` and field of type `keyword` will be created meantime, This is the multi-fields feature of ES, mapping is as follows: - -``` -"k4": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } -} -``` -When performing conditional filtering on k4, for example =, Doris On ES will convert the query to ES's TermQuery - -SQL filter: - -``` -k4 = "Doris On ES" -``` - -The query DSL converted into ES is: - -``` -"term" : { - "k4": "Doris On ES" - -} -``` - -Because the first field type of k4 is `text`, when data is imported, it will perform word segmentation processing according to the word segmentator set by k4 (if it is not set, it is the standard word segmenter) to get three Term of doris, on, and es, as follows ES analyze API analysis: - -``` -POST /_analyze -{ - "analyzer": "standard", - "text": "Doris On ES" -} -``` -The result of analyzed is: - -``` -{ - "tokens": [ - { - "token": "doris", - "start_offset": 0, - "end_offset": 5, - "type": "", - "position": 0 - }, - { - "token": "on", - "start_offset": 6, - "end_offset": 8, - "type": "", - "position": 1 - }, - { - "token": "es", - "start_offset": 9, - "end_offset": 11, - "type": "", - "position": 2 - } - ] -} -``` -The query uses: - -``` -"term" : { - "k4": "Doris On ES" -} -``` -This term does not match any term in the dictionary, and will not return any results, enable `enable_keyword_sniff: true` will automatically convert `k4 = "Doris On ES"` into `k4.keyword = "Doris On ES"`to exactly match SQL semantics, The converted ES query DSL is: - -``` -"term" : { - "k4.keyword": "Doris On ES" -} -``` - -The type of `k4.keyword` is `keyword`, and writing data into ES is a complete term, so it can be matched - -### Enable node discovery mechanism, default is true(es\_nodes\_discovery=true) - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", -"user" = "root", -"password" = "root", - -"nodes_discovery" = "true" -); -``` - -Parameter Description: - -Parameter | Description ----|--- -**es\_nodes\_discovery** | Whether or not to enable ES node discovery. the default is true - -Doris would find all available related data nodes (shards allocated on)from ES when this is true. Just set false if address of ES data nodes are not accessed by Doris BE, eg. the ES cluster is deployed in the intranet which isolated from your public Internet, and users access through a proxy - -### Whether ES cluster enables https access mode, if enabled should set value with`true`, default is false(http\_ssl\_enable=true) - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", -"user" = "root", -"password" = "root", - -"http_ssl_enabled" = "true" -); -``` - -Parameter Description: - -Parameter | Description ----|--- -**http\_ssl\_enabled** | Whether ES cluster enables https access mode - -The current FE/BE implementation is to trust all, this is a temporary solution, and the real user configuration certificate will be used later - -### Query usage - -After create the ES external table in Doris, there is no difference except that the data model (rollup, pre-aggregation, materialized view, etc.) with other table in Doris - -#### Basic usage - -``` -select * from es_table where k1 > 1000 and k3 ='term' or k4 like 'fu*z_' -``` - -#### Extended esquery(field, QueryDSL) -Through the `esquery(field, QueryDSL)` function, some queries that cannot be expressed in sql, such as match_phrase, geoshape, etc., are pushed down to the ES for filtering. The first column name parameter of `esquery` is used to associate the `index`, the second This parameter is the basic JSON expression of ES's `Query DSL`, which is contained in curly braces `{}`, and there can be only one root key of json, such as match_phrase, geo_shape, bool, etc. -Match query: - -``` -select * from es_table where esquery(k4, '{ - "match": { - "k4": "doris on es" - } - }'); -``` -Geo related queries: - -``` -select * from es_table where esquery(k4, '{ - "geo_shape": { - "location": { - "shape": { - "type": "envelope", - "coordinates": [ - [ - 13, - 53 - ], - [ - 14, - 52 - ] - ] - }, - "relation": "within" - } - } - }'); -``` - -Bool query: - -``` -select * from es_table where esquery(k4, ' { - "bool": { - "must": [ - { - "terms": { - "k1": [ - 11, - 12 - ] - } - }, - { - "terms": { - "k2": [ - 100 - ] - } - } - ] - } - }'); -``` - - - -## Principle - -``` -+----------------------------------------------+ -| | -| Doris +------------------+ | -| | FE +--------------+-------+ -| | | Request Shard Location -| +--+-------------+-+ | | -| ^ ^ | | -| | | | | -| +-------------------+ +------------------+ | | -| | | | | | | | | -| | +----------+----+ | | +--+-----------+ | | | -| | | BE | | | | BE | | | | -| | +---------------+ | | +--------------+ | | | -+----------------------------------------------+ | - | | | | | | | - | | | | | | | - | HTTP SCROLL | | HTTP SCROLL | | -+-----------+---------------------+------------+ | -| | v | | v | | | -| | +------+--------+ | | +------+-------+ | | | -| | | | | | | | | | | -| | | DataNode | | | | DataNode +<-----------+ -| | | | | | | | | | | -| | | +<--------------------------------+ -| | +---------------+ | | |--------------| | | | -| +-------------------+ +------------------+ | | -| Same Physical Node | | -| | | -| +-----------------------+ | | -| | | | | -| | MasterNode +<-----------------+ -| ES | | | -| +-----------------------+ | -+----------------------------------------------+ - - -``` - -1. FE requests the hosts specified by the table to obtain node‘s HTTP port, shards location of the index. If the request fails, it will traverse the host list sequentially until it succeeds or fails completely. - -2. When querying, the query plan will be generated and sent to the corresponding BE node according to some node information obtained by FE and metadata information of index. - -3. The BE node requests locally deployed ES nodes in accordance with the `proximity principle`. The BE receives data concurrently from each fragment of ES index in the `HTTP Scroll` mode. - -4. After calculating the result, return it to client - -## Best Practices - -### Suggestions for using Date type fields - -The use of Datetype fields in ES is very flexible, but in Doris On ES, if the type of the Date type field is not set properly, it will cause the filter condition cannot be pushed down. - -When creating an index, do maximum format compatibility with the setting of the Date type format: - -``` - "dt": { - "type": "date", - "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis" - } -``` - -When creating this field in Doris, it is recommended to set it to `date` or `datetime`, and it can also be set to `varchar` type. The following SQL statements can be used to directly push the filter condition down to ES - - -``` -select * from doe where k2 > '2020-06-21'; - -select * from doe where k2 < '2020-06-21 12:00:00'; - -select * from doe where k2 < 1593497011; - -select * from doe where k2 < now(); - -select * from doe where k2 < date_format(now(), '%Y-%m-%d'); -``` - -`Notice`: - -* If you don’t set the format for the time type field In ES, the default format for Date-type field is - -``` -strict_date_optional_time||epoch_millis -``` -* If the date field indexed into ES is unix timestamp, it needs to be converted to `ms`, and the internal timestamp of ES is processed according to `ms` unit, otherwise Doris On ES will display wrong column data - -### Fetch ES metadata field `_id` - -When indexing documents without specifying `_id`, ES will assign a globally unique `_id` field to each document. Users can also specify a `_id` with special represent some business meaning for the document when indexing; if needed, Doris On ES can get the value of this field by adding the `_id` field of type `varchar` when creating the ES external table - -``` -CREATE EXTERNAL TABLE `doe` ( - `_id` varchar COMMENT "", - `city` varchar COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://127.0.0.1:8200", -"user" = "root", -"password" = "root", -"index" = "doe", -"type" = "doc" -} -``` -`Notice`: - -1. The filtering condition of the `_id` field only supports two types: `=` and `in` -2. The `_id` field can only be of type `varchar` - -## Q&A - -1. ES Version Requirements - - The main version of ES is larger than 5. The scanning mode of ES data before 2. X and after 5. x is different. At present, the scanning mode of ES data after 5. x is supported. - -2. Does ES Cluster Support X-Pack Authentication - - Support all ES clusters using HTTP Basic authentication - -3. Some queries are much slower than requesting ES - - Yes, for example, query related to _count, etc., the ES internal will directly read the number of documents that meet the requirements of the relevant metadata, without the need to filter the real data. - -4. Whether the aggregation operation can be pushed down - - At present, Doris On ES does not support push-down operations such as sum, avg, min/max, etc., all documents satisfying the conditions are obtained from the ES in batch flow, and then calculated in Doris diff --git a/docs/en/extending-doris/flink-doris-connector.md b/docs/en/extending-doris/flink-doris-connector.md deleted file mode 100644 index acd7c016f8..0000000000 --- a/docs/en/extending-doris/flink-doris-connector.md +++ /dev/null @@ -1,496 +0,0 @@ ---- -{ - "title": "Flink Doris Connector", - "language": "en" -} ---- - - - -# Flink Doris Connector - -- The Flink Doris Connector can support operations (read, insert, modify, delete) data stored in Doris through Flink. - -Github: https://github.com/apache/incubator-doris-flink-connector - -* `Doris` table can be mapped to `DataStream` or `Table`. - ->**Note:** -> ->1. Modification and deletion are only supported on the Unique Key model ->2. The current deletion is to support Flink CDC to access data to achieve automatic deletion. If it is to delete other data access methods, you need to implement it yourself. For the data deletion usage of Flink CDC, please refer to the last section of this document - -## Version Compatibility - -| Connector | Flink | Doris | Java | Scala | -| --------- | ----- | ------ | ---- | ----- | -| 1.11.6-2.12-xx | 1.11.x | 0.13+ | 8 | 2.12 | -| 1.12.7-2.12-xx | 1.12.x | 0.13.+ | 8 | 2.12 | -| 1.13.5-2.12-xx | 1.13.x | 0.13.+ | 8 | 2.12 | -| 1.14.4-2.12-xx | 1.14.x | 0.13.+ | 8 | 2.12 | - -## Build and Install - -Ready to work - -1.Modify the `custom_env.sh.tpl` file and rename it to `custom_env.sh` - -2.Specify the thrift installation directory - -```bash -##source file content -#export THRIFT_BIN= -#export MVN_BIN= -#export JAVA_HOME= - -##amend as below,MacOS as an example -export THRIFT_BIN=/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift -#export MVN_BIN= -#export JAVA_HOME= - -Install `thrift` 0.13.0 (Note: `Doris` 0.15 and the latest builds are based on `thrift` 0.13.0, previous versions are still built with `thrift` 0.9.3) -Windows: - 1. Download: `http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe` - 2. Modify thrift-0.13.0.exe to thrift - -MacOS: - 1. Download: `brew install thrift@0.13.0` - 2. default address: /opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift - -Note: Executing `brew install thrift@0.13.0` on MacOS may report an error that the version cannot be found. The solution is as follows, execute it in the terminal: - 1. `brew tap-new $USER/local-tap` - 2. `brew extract --version='0.13.0' thrift $USER/local-tap` - 3. `brew install thrift@0.13.0` - Reference link: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c` - -Linux: - 1.Download source package: `wget https://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz` - 2.Install dependencies: `yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++` - 3.`tar zxvf thrift-0.13.0.tar.gz` - 4.`cd thrift-0.13.0` - 5.`./configure --without-tests` - 6.`make` - 7.`make install` - Check the version after installation is complete: thrift --version - Note: If you have compiled Doris, you do not need to install thrift, you can directly use $DORIS_HOME/thirdparty/installed/bin/thrift -``` - -Execute following command in source dir: - -```bash -sh build.sh - - Usage: - build.sh --flink version --scala version # specify flink and scala version - build.sh --tag # this is a build from tag - e.g.: - build.sh --flink 1.14.3 --scala 2.12 - build.sh --tag - -Then, for example, execute the command to compile according to the version you need: -sh build.sh --flink 1.14.3 --scala 2.12 -``` - -> Note: If you check out the source code from tag, you can just run `sh build.sh --tag` without specifying the Flink and Scala versions. This is because the version in the tag source code is fixed. For example, `1.13.5_2.12-1.0.1` means Flink version 1.13.5, scala version 2.12, and connector version 1.0.1. - -After successful compilation, the file `flink-doris-connector-1.14_2.12-1.0.0-SNAPSHOT.jar` will be generated in the `output/` directory. Copy this file to `ClassPath` in `Flink` to use `flink-doris-connector`. For example, `Flink` running in `Local` mode, put this file in the `jars/` folder. `Flink` running in `Yarn` cluster mode, put this file in the pre-deployment package. - -**Remarks:** - -1. Doris FE should be configured to enable http v2 in the configuration -2. Scala version currently supports 2.12 and 2.11 - -conf/fe.conf - -``` -enable_http_server_v2 = true -``` -## Using Maven - -Add flink-doris-connector and necessary Flink Maven dependencies - -Flink 1.13.* and earlier version - -``` - - org.apache.flink - flink-java - ${flink.version} - provided - - - org.apache.flink - flink-streaming-java_${scala.version} - ${flink.version} - provided - - - org.apache.flink - flink-clients_${scala.version} - ${flink.version} - provided - - - - org.apache.flink - flink-table-common - ${flink.version} - provided - - - org.apache.flink - flink-table-api-java-bridge_${scala.version} - ${flink.version} - provided - - - org.apache.flink - flink-table-planner-blink_${scala.version} - ${flink.version} - provided - - - - org.apache.doris - flink-doris-connector-1.13_2.12 - - - 1.0.3 - -``` - -Flink 1.14.* version - -``` - - org.apache.flink - flink-java - ${flink.version} - provided - - - org.apache.flink - flink-streaming-java_${scala.version} - ${flink.version} - provided - - - org.apache.flink - flink-clients_${scala.version} - ${flink.version} - provided - - - - org.apache.flink - flink-table-planner_${scala.version} - ${flink.version} - provided - - - - org.apache.doris - flink-doris-connector-1.14_2.12 - 1.0.3 - -``` - -**Notes** - -1.Please replace the corresponding Connector and Flink dependency versions according to different Flink and Scala versions. -2.At present, only the scala2.12 version of the package is provided in maven. The 2.11 version of the package needs to be compiled by itself. Please refer to the compilation and installation section above. - -## How to use - -There are three ways to use Flink Doris Connector. - -* SQL -* DataStream -* DataSet - -### Parameters Configuration - -Flink Doris Connector Sink writes data to Doris by the `Stream Load`, and also supports the configurations of `Stream Load` - -* SQL configured by `sink.properties.` in the `WITH` -* DataStream configured by `DorisExecutionOptions.builder().setStreamLoadProp(Properties)` - - -### SQL - -* Source - -```sql -CREATE TABLE flink_doris_source ( - name STRING, - age INT, - price DECIMAL(5,2), - sale DOUBLE - ) - WITH ( - 'connector' = 'doris', - 'fenodes' = '$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT', - 'table.identifier' = '$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME', - 'username' = '$YOUR_DORIS_USERNAME', - 'password' = '$YOUR_DORIS_PASSWORD' -); -``` - -* Sink - -```sql -CREATE TABLE flink_doris_sink ( - name STRING, - age INT, - price DECIMAL(5,2), - sale DOUBLE - ) - WITH ( - 'connector' = 'doris', - 'fenodes' = '$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT', - 'table.identifier' = '$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME', - 'username' = '$YOUR_DORIS_USERNAME', - 'password' = '$YOUR_DORIS_PASSWORD' -); -``` - -* Insert - -```sql -INSERT INTO flink_doris_sink select name,age,price,sale from flink_doris_source -``` - -### DataStream - -* Source - -```java - Properties properties = new Properties(); - properties.put("fenodes","FE_IP:8030"); - properties.put("username","root"); - properties.put("password",""); - properties.put("table.identifier","db.table"); - env.addSource(new DorisSourceFunction( - new DorisStreamOptions(properties), - new SimpleListDeserializationSchema() - ) - ).print(); -``` - -* Sink - -Json Stream - -```java -Properties pro = new Properties(); -pro.setProperty("format", "json"); -pro.setProperty("strip_outer_array", "true"); -env.fromElements( - "{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}" - ) - .addSink( - DorisSink.sink( - DorisReadOptions.builder().build(), - DorisExecutionOptions.builder() - .setBatchSize(3) - .setBatchIntervalMs(0l) - .setMaxRetries(3) - .setStreamLoadProp(pro).build(), - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); -``` - -Json Stream - -```java -env.fromElements( - "{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}" - ) - .addSink( - DorisSink.sink( - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); -``` - -RowData Stream - -```java -DataStream source = env.fromElements("") - .map(new MapFunction() { - @Override - public RowData map(String value) throws Exception { - GenericRowData genericRowData = new GenericRowData(3); - genericRowData.setField(0, StringData.fromString("北京")); - genericRowData.setField(1, 116.405419); - genericRowData.setField(2, 39.916927); - return genericRowData; - } - }); - -String[] fields = {"city", "longitude", "latitude"}; -LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()}; - -source.addSink( - DorisSink.sink( - fields, - types, - DorisReadOptions.builder().build(), - DorisExecutionOptions.builder() - .setBatchSize(3) - .setBatchIntervalMs(0L) - .setMaxRetries(3) - .build(), - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); -``` - -### DataSet - -* Sink - -```java -MapOperator data = env.fromElements("") - .map(new MapFunction() { - @Override - public RowData map(String value) throws Exception { - GenericRowData genericRowData = new GenericRowData(3); - genericRowData.setField(0, StringData.fromString("北京")); - genericRowData.setField(1, 116.405419); - genericRowData.setField(2, 39.916927); - return genericRowData; - } - }); - -DorisOptions dorisOptions = DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build(); -DorisReadOptions readOptions = DorisReadOptions.defaults(); -DorisExecutionOptions executionOptions = DorisExecutionOptions.defaults(); - -LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()}; -String[] fields = {"city", "longitude", "latitude"}; - -DorisDynamicOutputFormat outputFormat = new DorisDynamicOutputFormat( - dorisOptions, readOptions, executionOptions, types, fields - ); - -outputFormat.open(0, 1); -data.output(outputFormat); -outputFormat.close(); -``` - - - -### General - -| Key | Default Value | Comment | -| -------------------------------- | ----------------- | ------------------------------------------------------------ | -| fenodes | -- | Doris FE http address, support multiple addresses, separated by commas | -| table.identifier | -- | Doris table identifier, eg, db1.tbl1 | -| username | -- | Doris username | -| password | -- | Doris password | -| doris.request.retries | 3 | Number of retries to send requests to Doris | -| doris.request.connect.timeout.ms | 30000 | Connection timeout for sending requests to Doris | -| doris.request.read.timeout.ms | 30000 | Read timeout for sending request to Doris | -| doris.request.query.timeout.s | 3600 | Query the timeout time of Doris, the default is 1 hour, -1 means no timeout limit | -| doris.request.tablet.size | Integer.MAX_VALUE | The number of Doris Tablets corresponding to an Partition. The smaller this value is set, the more partitions will be generated. This will increase the parallelism on the Flink side, but at the same time will cause greater pressure on Doris. | -| doris.batch.size | 1024 | The maximum number of rows to read data from BE at one time. Increasing this value can reduce the number of connections between Flink and Doris. Thereby reducing the extra time overhead caused by network delay. | -| doris.exec.mem.limit | 2147483648 | Memory limit for a single query. The default is 2GB, in bytes. | -| doris.deserialize.arrow.async | false | Whether to support asynchronous conversion of Arrow format to RowBatch required for flink-doris-connector iteration | -| doris.deserialize.queue.size | 64 | Asynchronous conversion of the internal processing queue in Arrow format takes effect when doris.deserialize.arrow.async is true | -| doris.read.field | -- | List of column names in the Doris table, separated by commas | -| doris.filter.query | -- | Filter expression of the query, which is transparently transmitted to Doris. Doris uses this expression to complete source-side data filtering. | -| sink.batch.size | 10000 | Maximum number of lines in a single write BE | -| sink.max-retries | 1 | Number of retries after writing BE failed | -| sink.batch.interval | 10s | The flush interval, after which the asynchronous thread will write the data in the cache to BE. The default value is 10 second, and the time units are ms, s, min, h, and d. Set to 0 to turn off periodic writing. | -| sink.properties.* | -- | The stream load parameters.

eg:
sink.properties.column_separator' = ','

Setting 'sink.properties.escape_delimiters' = 'true' if you want to use a control char as a separator, so that such as '\\x01' will translate to binary 0x01

Support JSON format import, you need to enable both 'sink.properties.format' ='json' and 'sink.properties.strip_outer_array' ='true'| -| sink.enable-delete | true | Whether to enable deletion. This option requires Doris table to enable batch delete function (0.15+ version is enabled by default), and only supports Unique model.| -| sink.batch.bytes | 10485760 | Maximum bytes of batch in a single write to BE. When the data size in batch exceeds this threshold, cache data is written to BE. The default value is 10MB | - -## Doris & Flink Column Type Mapping - -| Doris Type | Flink Type | -| ---------- | -------------------------------- | -| NULL_TYPE | NULL | -| BOOLEAN | BOOLEAN | -| TINYINT | TINYINT | -| SMALLINT | SMALLINT | -| INT | INT | -| BIGINT | BIGINT | -| FLOAT | FLOAT | -| DOUBLE | DOUBLE | -| DATE | STRING | -| DATETIME | STRING | -| DECIMAL | DECIMAL | -| CHAR | STRING | -| LARGEINT | STRING | -| VARCHAR | STRING | -| DECIMALV2 | DECIMAL | -| TIME | DOUBLE | -| HLL | Unsupported datatype | - -## An example of using Flink CDC to access Doris (supports Insert / Update / Delete events) -```sql -CREATE TABLE cdc_mysql_source ( - id int - ,name VARCHAR - ,PRIMARY KEY (id) NOT ENFORCED -) WITH ( - 'connector' = 'mysql-cdc', - 'hostname' = '127.0.0.1', - 'port' = '3306', - 'username' = 'root', - 'password' = 'password', - 'database-name' = 'database', - 'table-name' = 'table' -); - --- Support delete event synchronization (sink.enable-delete='true'), requires Doris table to enable batch delete function -CREATE TABLE doris_sink ( -id INT, -name STRING -) -WITH ( - 'connector' = 'doris', - 'fenodes' = '127.0.0.1:8030', - 'table.identifier' = 'database.table', - 'username' = 'root', - 'password' = '', - 'sink.properties.format' = 'json', - 'sink.properties.strip_outer_array' = 'true', - 'sink.enable-delete' = 'true' -); - -insert into doris_sink select id,name from cdc_mysql_source; -``` diff --git a/docs/en/extending-doris/hive-bitmap-udf.md b/docs/en/extending-doris/hive-bitmap-udf.md deleted file mode 100644 index 40cb13cf3f..0000000000 --- a/docs/en/extending-doris/hive-bitmap-udf.md +++ /dev/null @@ -1,97 +0,0 @@ ---- -{ - "title": "Hive Bitmap UDF", - "language": "en" -} ---- - - - -# Hive UDF - - Hive Bitmap UDF provides UDFs for generating bitmap and bitmap operations in hive tables. The bitmap in Hive is exactly the same as the Doris bitmap. The bitmap in Hive can be imported into doris through (spark bitmap load). - - the main purpose: - 1. Reduce the time of importing data into doris, and remove processes such as dictionary building and bitmap pre-aggregation; - 2. Save hive storage, use bitmap to compress data, reduce storage cost; - 3. Provide flexible bitmap operations in hive, such as: intersection, union, and difference operations, and the calculated bitmap can also be directly imported into doris; imported into doris; - -## How To Use - -### Create Bitmap type table in Hive - -```sql - --- Example: Create Hive Bitmap Table -CREATE TABLE IF NOT EXISTS `hive_bitmap_table`( - `k1` int COMMENT '', - `k2` String COMMENT '', - `k3` String COMMENT '', - `uuid` binary COMMENT 'bitmap' -) comment 'comment' - -``` - -### Hive Bitmap UDF Usage: - - Hive Bitmap UDF used in Hive/Spark - -```sql - --- Load the Hive Bitmap Udf jar package (Upload the compiled hive-udf jar package to HDFS) -add jar hdfs://node:9001/hive-udf-jar-with-dependencies.jar; - --- Create Hive Bitmap UDAF function -create temporary function to_bitmap as 'org.apache.doris.udf.ToBitmapUDAF'; -create temporary function bitmap_union as 'org.apache.doris.udf.BitmapUnionUDAF'; - --- Create Hive Bitmap UDF function -create temporary function bitmap_count as 'org.apache.doris.udf.BitmapCountUDF'; -create temporary function bitmap_and as 'org.apache.doris.udf.BitmapAndUDF'; -create temporary function bitmap_or as 'org.apache.doris.udf.BitmapOrUDF'; -create temporary function bitmap_xor as 'org.apache.doris.udf.BitmapXorUDF'; - --- Example: Generate bitmap by to_bitmap function and write to Hive Bitmap table -insert into hive_bitmap_table -select - k1, - k2, - k3, - to_bitmap(uuid) as uuid -from - hive_table -group by - k1, - k2, - k3 - --- Example: The bitmap_count function calculate the number of elements in the bitmap -select k1,k2,k3,bitmap_count(uuid) from hive_bitmap_table - --- Example: The bitmap_union function calculate the grouped bitmap union -select k1,bitmap_union(uuid) from hive_bitmap_table group by k1 - -``` - -### Hive Bitmap UDF Description - -## Hive Bitmap import into Doris - - see details: Load Data -> Spark Load -> Basic operation -> Create load(Example 3: when the upstream data source is hive binary type table) diff --git a/docs/en/extending-doris/hive-of-doris.md b/docs/en/extending-doris/hive-of-doris.md deleted file mode 100644 index cfa98ab0f4..0000000000 --- a/docs/en/extending-doris/hive-of-doris.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -{ - "title": "Doris On Hive", - "language": "en" -} ---- - - - -# Hive External Table of Doris - -Hive External Table of Doris provides Doris with direct access to Hive external tables, which eliminates the need for cumbersome data import and solves the problem of analyzing Hive tables with the help of Doris' OLAP capabilities: - - 1. support for Hive data sources to access Doris - 2. Support joint queries between Doris and Hive data sources to perform more complex analysis operations - -This document introduces how to use this feature and the considerations. - -## Glossary - -### Noun in Doris - -* FE: Frontend, the front-end node of Doris, responsible for metadata management and request access. -* BE: Backend, the backend node of Doris, responsible for query execution and data storage - -## How To Use - -### Create Hive External Table - -```sql --- Syntax -CREATE [EXTERNAL] TABLE table_name ( - col_name col_type [NULL | NOT NULL] [COMMENT "comment"] -) ENGINE=HIVE -[COMMENT "comment"] ) -PROPERTIES ( - 'property_name'='property_value', - ... -); - --- Example: Create the hive_table table under hive_db in a Hive cluster -CREATE TABLE `t_hive` ( - `k1` int NOT NULL COMMENT "", - `k2` char(10) NOT NULL COMMENT "", - `k3` datetime NOT NULL COMMENT "", - `k5` varchar(20) NOT NULL COMMENT "", - `k6` double NOT NULL COMMENT "" -) ENGINE=HIVE -COMMENT "HIVE" -PROPERTIES ( -'hive.metastore.uris' = 'thrift://192.168.0.1:9083', -'database' = 'hive_db', -'table' = 'hive_table' -); -``` - -#### Parameter Description - -- External Table Columns - - Column names should correspond to the Hive table - - The order of the columns should be the same as the Hive table - - Must contain all the columns in the Hive table - - Hive table partition columns do not need to be specified, they can be defined as normal columns. -- ENGINE should be specified as HIVE -- PROPERTIES attribute. - - `hive.metastore.uris`: Hive Metastore service address - - `database`: the name of the database to which Hive is mounted - - `table`: the name of the table to which Hive is mounted - -## Data Type Matching - -The supported Hive column types correspond to Doris in the following table. - -| Hive | Doris | Description | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| CHAR | CHAR | Only UTF8 encoding is supported | -| VARCHAR | VARCHAR | Only UTF8 encoding is supported | -| TINYINT | TINYINT | | -| SMALLINT | SMALLINT | | -| INT | INT | | -| BIGINT | BIGINT | | -| FLOAT | FLOAT | | -| DOUBLE | DOUBLE | | -| DECIMAL | DECIMAL | | -| DATE | DATE | | -| TIMESTAMP | DATETIME | Timestamp to Datetime will lose precision | - -**Note:** -- Hive table Schema changes **are not automatically synchronized** and require rebuilding the Hive external table in Doris. -- The current Hive storage format only supports Text, Parquet and ORC types -- The Hive version currently supported by default is `2.3.7、3.1.2`, which has not been tested in other versions. More versions will be supported in the future. - -### Query Usage - -After you finish building the Hive external table in Doris, it is no different from a normal Doris OLAP table except that you cannot use the data model in Doris (rollup, preaggregation, materialized view, etc.) - -```sql -select * from t_hive where k1 > 1000 and k3 = 'term' or k4 like '%doris'; -``` diff --git a/docs/en/extending-doris/iceberg-of-doris.md b/docs/en/extending-doris/iceberg-of-doris.md deleted file mode 100644 index aa83042feb..0000000000 --- a/docs/en/extending-doris/iceberg-of-doris.md +++ /dev/null @@ -1,211 +0,0 @@ ---- -{ - "title": "Doris On Iceberg", - "language": "en" -} ---- - - - -# Iceberg External Table of Doris - -Iceberg External Table of Doris provides Doris with the ability to access Iceberg external tables directly, eliminating the need for cumbersome data import and leveraging Doris' own OLAP capabilities to solve Iceberg table data analysis problems. - - 1. support Iceberg data sources to access Doris - 2. Support joint query between Doris and Iceberg data source tables to perform more complex analysis operations - -This document introduces how to use this feature and the considerations. - -## Glossary - -### Noun in Doris - -* FE: Frontend, the front-end node of Doris, responsible for metadata management and request access -* BE: Backend, the backend node of Doris, responsible for query execution and data storage - -## How to use - -### Create Iceberg External Table - -Iceberg tables can be created in Doris in two ways. You do not need to declare the column definitions of the table when creating an external table, Doris can automatically convert them based on the column definitions of the table in Iceberg. - -1. Create a separate external table to mount the Iceberg table. - The syntax can be viewed in `HELP CREATE TABLE`. - - ```sql - -- Syntax - CREATE [EXTERNAL] TABLE table_name - ENGINE = ICEBERG - [COMMENT "comment"] - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.table" = "icberg_table_name", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - - - -- Example: Mount iceberg_table under iceberg_db in Iceberg - CREATE TABLE `t_iceberg` - ENGINE = ICEBERG - PROPERTIES ( - "iceberg.database" = "iceberg_db", - "iceberg.table" = "iceberg_table", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - -2. Create an Iceberg database to mount the corresponding Iceberg database on the remote side, and mount all the tables under the database. - You can check the syntax with `HELP CREATE DATABASE`. - - ```sql - -- Syntax - CREATE DATABASE db_name - [COMMENT "comment"] - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - - -- Example: mount the iceberg_db in Iceberg and mount all tables under that db - CREATE DATABASE `iceberg_test_db` - PROPERTIES ( - "iceberg.database" = "iceberg_db", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - - The progress of the table build in `iceberg_test_db` can be viewed by `HELP SHOW TABLE CREATION`. - - -You can also create an Iceberg table by explicitly specifying the column definitions according to your needs. - -1. Create an Iceberg table - - ```sql - -- Syntax - CREATE [EXTERNAL] TABLE table_name ( - col_name col_type [NULL | NOT NULL] [COMMENT "comment"] - ) ENGINE = ICEBERG - [COMMENT "comment"] ) - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.table" = "icberg_table_name", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - - -- Example: Mount iceberg_table under iceberg_db in Iceberg - CREATE TABLE `t_iceberg` ( - `id` int NOT NULL COMMENT "id number", - `name` varchar(10) NOT NULL COMMENT "user name" - ) ENGINE = ICEBERG - PROPERTIES ( - "iceberg.database" = "iceberg_db", - "iceberg.table" = "iceberg_table", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - -#### Parameter Description - -- External Table Columns - - Column names should correspond to the Iceberg table - - The order of the columns needs to be consistent with the Iceberg table -- ENGINE needs to be specified as ICEBERG -- PROPERTIES property. - - `iceberg.hive.metastore.uris`: Hive Metastore service address - - `iceberg.database`: the name of the database to which Iceberg is mounted - - `iceberg.table`: the name of the table to which Iceberg is mounted, not required when mounting Iceberg database. - - `iceberg.catalog.type`: the catalog method used in Iceberg, the default is `HIVE_CATALOG`, currently only this method is supported, more Iceberg catalog access methods will be supported in the future. - -### Show table structure - -Show table structure can be viewed by `HELP SHOW CREATE TABLE`. - -### Synchronized mounts - -When the Iceberg table Schema changes, you can manually synchronize it with the `REFRESH` command, which will remove and rebuild the Iceberg external table in Doris, as seen in the `HELP REFRESH` help. - -```sql --- Synchronize the Iceberg table -REFRESH TABLE t_iceberg; - --- Synchronize the Iceberg database -REFRESH DATABASE iceberg_test_db; -``` - -## Data Type Matching - -The supported Iceberg column types correspond to Doris in the following table. - -| Iceberg | Doris | Description | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| INTEGER | INT | | -| LONG | BIGINT | | -| FLOAT | FLOAT | | -| DOUBLE | DOUBLE | | -| DATE | DATE | | -| TIMESTAMP | DATETIME | Timestamp to Datetime with loss of precision | -| STRING | STRING | | -| UUID | VARCHAR | Use VARCHAR instead | -| DECIMAL | DECIMAL | | -| TIME | - | not supported | -| FIXED | - | not supported | -| BINARY | - | not supported | -| STRUCT | - | not supported | -| LIST | - | not supported | -| MAP | - | not supported | - -**Note:** -- Iceberg table Schema changes **are not automatically synchronized** and require synchronization of Iceberg external tables or databases in Doris via the `REFRESH` command. -- The current default supported version of Iceberg is 0.12.0 and has not been tested in other versions. More versions will be supported in the future. - -### Query Usage - -Once you have finished building the Iceberg external table in Doris, it is no different from a normal Doris OLAP table except that you cannot use the data models in Doris (rollup, preaggregation, materialized views, etc.) - -```sql -select * from t_iceberg where k1 > 1000 and k3 = 'term' or k4 like '%doris'; -``` - -## Related system configurations - -### FE Configuration - -The following configurations are at the Iceberg external table system level and can be configured by modifying `fe.conf` or by `ADMIN SET CONFIG`. - -- `iceberg_table_creation_strict_mode` - - Iceberg tables are created with strict mode enabled by default. - strict mode means that the column types of the Iceberg table are strictly filtered, and if there are data types that Doris does not currently support, the creation of the table will fail. - -- `iceberg_table_creation_interval_second` - - The background task execution interval for automatic creation of Iceberg tables, default is 10s. - -- `max_iceberg_table_creation_record_size` - - The maximum value reserved for Iceberg table creation records, default is 2000. Only for creating Iceberg database records. diff --git a/docs/en/extending-doris/logstash.md b/docs/en/extending-doris/logstash.md deleted file mode 100644 index 96dbfdb73d..0000000000 --- a/docs/en/extending-doris/logstash.md +++ /dev/null @@ -1,198 +0,0 @@ ---- -{ - "title": "Logstash Doris Output Plugin", - "language": "en" -} ---- - - - -# Doris output plugin - -This plugin is used to output data to Doris for logstash, use the HTTP protocol to interact with the Doris FE Http interface, and import data through Doris's stream load. - -[Learn more about Doris Stream Load ](http://doris.apache.org/administrator-guide/load-data/stream-load-manual.html) - -[Learn more about Doris](http://doris.apache.org) - - -## Install and compile -### 1.Download source code - -### 2.compile ## -Execute under extension/logstash/ directory - -`gem build logstash-output-doris.gemspec` - -You will get logstash-output-doris-{version}.gem file in the same directory - -### 3.Plug-in installation -copy logstash-output-doris-{version}.gem to the logstash installation directory - -Executing an order - -`./bin/logstash-plugin install logstash-output-doris-{version}.gem` - -Install logstash-output-doris plugin - -## Configuration -### Example: - -Create a new configuration file in the config directory and name it logstash-doris.conf - -The specific configuration is as follows: - - output { - doris { - http_hosts => [ "http://fehost:8030" ] - user => user_name - password => password - db => "db_name" - table => "table_name" - label_prefix => "label_prefix" - column_separator => "," - } - } - -Configuration instructions: - -Connection configuration: - -Configuration | Explanation ---- | --- -`http_hosts` | FE's HTTP interactive address eg | ["http://fe1:8030", "http://fe2:8030"] -`user` | User name, the user needs to have import permission for the doris table -`password` | Password -`db` | Database name -`table` | Table name -`label_prefix` | Import the identification prefix, the final generated ID is *{label\_prefix}\_{db}\_{table}\_{time_stamp}* - - -Load configuration:([Reference documents](http://doris.apache.org/master/zh-CN/administrator-guide/load-data/stream-load-manual.html)) - -Configuration | Explanation ---- | --- -`column_separator` | Column separator, the default is \t -`columns` | Used to specify the correspondence between the columns in the import file and the columns in the table -`where` | The filter conditions specified by the import task -`max_filter_ratio` | The maximum tolerance rate of the import task, the default is zero tolerance -`partition` | Partition information of the table to be imported -`timeout` | timeout, the default is 600s -`strict_mode` | Strict mode, the default is false -`timezone` | Specify the time zone used for this import, the default is the East Eight District -`exec_mem_limit` | Import memory limit, default is 2GB, unit is byte - -Other configuration: - -Configuration | Explanation ---- | --- -`save_on_failure` | If the import fails to save locally, the default is true -`save_dir` | Local save directory, default is /tmp -`automatic_retries` | The maximum number of retries on failure, the default is 3 -`batch_size` | The maximum number of events processed per batch, the default is 100000 -`idle_flush_time` | Maximum interval, the default is 20 (seconds) - - -## Start Up -Run the command to start the doris output plugin: - -`{logstash-home}/bin/logstash -f {logstash-home}/config/logstash-doris.conf --config.reload.automatic` - - - - -## Complete usage example -### 1. Compile doris-output-plugin -1> Download the ruby compressed package and go to [ruby official website](https://www.ruby-lang.org/en/downloads/) to download it. The version 2.7.1 used here - -2> Compile and install, configure ruby environment variables - -3> Go to the doris source extension/logstash/ directory and execute - -`gem build logstash-output-doris.gemspec` - -Get the file logstash-output-doris-0.1.0.gem, and the compilation is complete - -### 2. Install and configure filebeat (here use filebeat as input) - -1> [es official website](https://www.elastic.co/) Download the filebeat tar compression package and decompress it - -2> Enter the filebeat directory and modify the configuration file filebeat.yml as follows: - - filebeat.inputs: - - type: log - paths: - - /tmp/doris.data - output.logstash: - hosts: ["localhost:5044"] - -/tmp/doris.data is the doris data path - -3> Start filebeat: - -`./filebeat -e -c filebeat.yml -d "publish"` - - -### 3.Install logstash and doris-out-plugin -1> [es official website](https://www.elastic.co/) Download the logstash tar compressed package and decompress it - -2> Copy the logstash-output-doris-0.1.0.gem obtained in step 1 to the logstash installation directory - -3> execute - -`./bin/logstash-plugin install logstash-output-doris-0.1.0.gem` - -Install the plugin - -4> Create a new configuration file logstash-doris.conf in the config directory as follows: - - input { - beats { - port => "5044" - } - } - - output { - doris { - http_hosts => [ "http://127.0.0.1:8030" ] - user => doris - password => doris - db => "logstash_output_test" - table => "output" - label_prefix => "doris" - column_separator => "," - columns => "a,b,c,d,e" - } - } - -The configuration here needs to be configured according to the configuration instructions - -5> Start logstash: - -./bin/logstash -f ./config/logstash-doris.conf --config.reload.automatic - -### 4.Test Load - -Add write data to /tmp/doris.data - -`echo a,b,c,d,e >> /tmp/doris.data` - -Observe the logstash log. If the status of the returned response is Success, the import was successful. At this time, you can view the imported data in the logstash_output_test.output table - diff --git a/docs/en/extending-doris/odbc-of-doris.md b/docs/en/extending-doris/odbc-of-doris.md deleted file mode 100644 index fe8915c47d..0000000000 --- a/docs/en/extending-doris/odbc-of-doris.md +++ /dev/null @@ -1,374 +0,0 @@ ---- -{ - "title": "Doris On ODBC", - "language": "en" -} ---- - - - - -# ODBC External Table Of Doris - -ODBC external table of Doris provides Doris access to external tables through the standard interface for database access (ODBC). The external table eliminates the tedious data import work and enables Doris to have the ability to access all kinds of databases. It solves the data analysis problem of external tables with Doris' OLAP capability. - -1. Support various data sources to access Doris -2. Support Doris query with tables in various data sources to perform more complex analysis operations -3. Use insert into to write the query results executed by Doris to the external data source - - -This document mainly introduces the implementation principle and usage of this ODBC external table. - -## Glossary - -### Noun in Doris - -* FE: Frontend, the front-end node of Doris. Responsible for metadata management and request access. -* BE: Backend, Doris's back-end node. Responsible for query execution and data storage. - -## How To Use - -### Create ODBC External Table - -#### 1. Creating ODBC external table without resource - -``` -CREATE EXTERNAL TABLE `baseall_oracle` ( - `k1` decimal(9, 3) NOT NULL COMMENT "", - `k2` char(10) NOT NULL COMMENT "", - `k3` datetime NOT NULL COMMENT "", - `k5` varchar(20) NOT NULL COMMENT "", - `k6` double NOT NULL COMMENT "" -) ENGINE=ODBC -COMMENT "ODBC" -PROPERTIES ( -"host" = "192.168.0.1", -"port" = "8086", -"user" = "test", -"password" = "test", -"database" = "test", -"table" = "baseall", -"driver" = "Oracle 19 ODBC driver", -"type" = "oracle" -); -``` - -#### 2. Creating ODBC external table by resource (recommended) -``` -CREATE EXTERNAL RESOURCE `oracle_odbc` -PROPERTIES ( -"type" = "odbc_catalog", -"host" = "192.168.0.1", -"port" = "8086", -"user" = "test", -"password" = "test", -"database" = "test", -"odbc_type" = "oracle", -"driver" = "Oracle 19 ODBC driver" -); - -CREATE EXTERNAL TABLE `baseall_oracle` ( - `k1` decimal(9, 3) NOT NULL COMMENT "", - `k2` char(10) NOT NULL COMMENT "", - `k3` datetime NOT NULL COMMENT "", - `k5` varchar(20) NOT NULL COMMENT "", - `k6` double NOT NULL COMMENT "" -) ENGINE=ODBC -COMMENT "ODBC" -PROPERTIES ( -"odbc_catalog_resource" = "oracle_odbc", -"database" = "test", -"table" = "baseall" -); -``` - -The following parameters are accepted by ODBC external table: - -Parameter | Description ----|--- -**hosts** | IP address of external database -**driver** | The driver name of ODBC Driver, which needs to be/conf/odbcinst.ini. The driver names should be consistent. -**type** | The type of external database, currently supports Oracle, MySQL and PostgerSQL -**user** | The user name of database -**password** | password for the user - - -##### Installation and configuration of ODBC driver - - -Each database will provide ODBC access driver. Users can install the corresponding ODBC driver lib library according to the official recommendation of each database. - -After installation of ODBC driver, find the path of the driver lib Library of the corresponding database. The modify be/conf/odbcinst.ini Configuration like: - -``` -[MySQL Driver] -Description = ODBC for MySQL -Driver = /usr/lib64/libmyodbc8w.so -FileUsage = 1 -``` -* `[]`: The corresponding driver name in is the driver name. When creating an external table, the driver name of the external table should be consistent with that in the configuration file. -* `Driver=`: This should be setted in according to the actual be installation path of the driver. It is essentially the path of a dynamic library. Here, we need to ensure that the pre dependencies of the dynamic library are met. - -**Remember, all BE nodes are required to have the same driver installed, the same installation path and the same be/conf/odbcinst.ini config.** - - -### Query usage - -After the ODBC external table is create in Doris, it is no different from ordinary Doris tables except that the data model (rollup, pre aggregation, materialized view, etc.) in Doris cannot be used. - -``` -select * from oracle_table where k1 > 1000 and k3 ='term' or k4 like '%doris' -``` - -### Data write - -After the ODBC external table is create in Doris, the data can be written directly by the `insert into` statement, the query results of Doris can be written to the ODBC external table, or the data can be imported from one ODBC table to another. - -``` -insert into oracle_table values(1, "doris"); -insert into oracle_table select * from postgre_table; -``` -#### Transaction - - -The data of Doris is written to the external table by a group of batch. If the import is interrupted, the data written before may need to be rolled back. Therefore, the ODBC external table supports transactions when data is written. Transaction support needs to be supported set by session variable: `enable_odbc_transcation`. - -``` -set enable_odbc_transcation = true; -``` - -Transactions ensure the atomicity of ODBC external table writing, but it will reduce the performance of data writing ., so we can consider turning on the way as appropriate. - -## Database ODBC version correspondence - -### Centos Operating System - -The unixODBC versions used are: 2.3.1, Doris 0.15, centos 7.9, all of which are installed using yum. - -#### 1.mysql - -| Mysql version | Mysql ODBC version | -| ------------- | ------------------ | -| 8.0.27 | 8.0.27, 8.026 | -| 5.7.36 | 5.3.11, 5.3.13 | -| 5.6.51 | 5.3.11, 5.3.13 | -| 5.5.62 | 5.3.11, 5.3.13 | - -#### 2. PostgreSQL - -PostgreSQL's yum source rpm package address: - -```` -https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm -```` - -This contains all versions of PostgreSQL from 9.x to 14.x, including the corresponding ODBC version, which can be installed as needed. - -| PostgreSQL Version | PostgreSQL ODBC Version | -| ------------------ | ---------------------------- | -| 12.9 | postgresql12-odbc-13.02.0000 | -| 13.5 | postgresql13-odbc-13.02.0000 | -| 14.1 | postgresql14-odbc-13.02.0000 | -| 9.6.24 | postgresql96-odbc-13.02.0000 | -| 10.6 | postgresql10-odbc-13.02.0000 | -| 11.6 | postgresql11-odbc-13.02.0000 | - -#### 3. Oracle - -#### - -| Oracle版本 | Oracle ODBC版本 | -| ------------------------------------------------------------ | ------------------------------------------ | -| Oracle Database 11g Enterprise Edition Release 11.2.0.1.0 - 64bit Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | -| Oracle Database 12c Standard Edition Release 12.2.0.1.0 - 64bit Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | -| Oracle Database 18c Enterprise Edition Release 18.0.0.0.0 - Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | -| Oracle Database 19c Enterprise Edition Release 19.0.0.0.0 - Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | -| Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | - -Oracle ODBC driver version download address: - -``` -https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-sqlplus-19.13.0.0.0-2.x86_64.rpm -https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-devel-19.13.0.0.0-2.x86_64.rpm -https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-odbc-19.13.0.0.0-2.x86_64.rpm -https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-basic-19.13.0.0.0-2.x86_64.rpm -``` - -## Ubuntu operating system - -The unixODBC versions used are: 2.3.4, Doris 0.15, Ubuntu 20.04 - -#### 1. Mysql - -| Mysql version | Mysql ODBC version | -| ------------- | ------------------ | -| 8.0.27 | 8.0.11, 5.3.13 | - -Currently only tested this version, other versions will be added after testing - -#### 2. PostgreSQL - -| PostgreSQL Version | PostgreSQL ODBC Version | -| ------------------ | ----------------------- | -| 12.9 | psqlodbc-12.02.0000 | - -For other versions, as long as you download the ODBC driver version that matches the major version of the database, there is no problem. This will continue to supplement the test results of other versions under the Ubuntu system. - -#### 3. Oracle - -The same as the Oracle database and ODBC correspondence of the Centos operating system, and the following method is used to install the rpm package under ubuntu. - -In order to install rpm packages under ubuntu, we also need to install an alien, which is a tool that can convert rpm packages into deb installation packages - -```` -sudo apt-get install alien -```` - -Then execute the installation of the above four packages - -```` -sudo alien -i oracle-instantclient19.13-basic-19.13.0.0.0-2.x86_64.rpm -sudo alien -i oracle-instantclient19.13-devel-19.13.0.0.0-2.x86_64.rpm -sudo alien -i oracle-instantclient19.13-odbc-19.13.0.0.0-2.x86_64.rpm -sudo alien -i oracle-instantclient19.13-sqlplus-19.13.0.0.0-2.x86_64.rpm -```` - -## Data type mapping - -There are different data types among different databases. Here, the types in each database and the data type matching in Doris are listed. - -### MySQL - -| MySQL | Doris | Alternation rules | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| CHAR | CHAR | Only UTF8 encoding is supported | -| VARCHAR | VARCHAR | Only UTF8 encoding is supported | -| DATE | DATE | | -| FLOAT | FLOAT | | -| TINYINT | TINYINT | | -| SMALLINT | SMALLINT | | -| INT | INT | | -| BIGINT | BIGINT | | -| DOUBLE | DOUBLE | | -| DATE | DATE | | -| DATETIME | DATETIME | | -| DECIMAL | DECIMAL | | - -### PostgreSQL - -| PostgreSQL | Doris | Alternation rules | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| CHAR | CHAR | Only UTF8 encoding is supported | -| VARCHAR | VARCHAR | Only UTF8 encoding is supported -| DATE | DATE | | -| REAL | FLOAT | | -| SMALLINT | SMALLINT | | -| INT | INT | | -| BIGINT | BIGINT | | -| DOUBLE | DOUBLE | | -| TIMESTAMP | DATETIME | | -| DECIMAL | DECIMAL | | - -### Oracle - -| Oracle | Doris | Alternation rules | -| :------: | :----: | :-------------------------------: | -| not support | BOOLEAN | Oracle can replace Boolean with number (1) | -| CHAR | CHAR | | -| VARCHAR | VARCHAR | | -| DATE | DATE | | -| FLOAT | FLOAT | | -| not support | TINYINT | Oracle can be replaced by NUMBER | -| SMALLINT | SMALLINT | | -| INT | INT | | -| not support | BIGINT | Oracle can be replaced by NUMBER | -| not support | DOUBLE | Oracle can be replaced by NUMBER | -| DATE | DATE | | -| DATETIME | DATETIME | | -| NUMBER | DECIMAL | | - -### SQLServer - -| SQLServer | Doris | Alternation rules | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| CHAR | CHAR | Only UTF8 encoding is supported | -| VARCHAR | VARCHAR | Only UTF8 encoding is supported | -| DATE/ | DATE | | -| REAL | FLOAT | | -| TINYINT | TINYINT | | -| SMALLINT | SMALLINT | | -| INT | INT | | -| BIGINT | BIGINT | | -| FLOAT | DOUBLE | | -| DATETIME/DATETIME2 | DATETIME | | -| DECIMAL/NUMERIC | DECIMAL | | - -## Q&A - -1. Relationship with the original external table of MySQL? - -After accessing the ODBC external table, the original way to access the MySQL external table will be gradually abandoned. If you have not used the MySQL external table before, it is recommended that the newly accessed MySQL tables use ODBC external table directly. - -2. Besides MySQL, Oracle, SQLServer, PostgreSQL, can doris support more databases? - -Currently, Doris only adapts to MySQL, Oracle, SQLServer, PostgreSQL. The adaptation of other databases is under planning. In principle, any database that supports ODBC access can be accessed through the ODBC external table. If you need to access other databases, you are welcome to modify the code and contribute to Doris. - -3. When is it appropriate to use ODBC external tables? - - Generally, when the amount of external data is small and less than 100W, it can be accessed through ODBC external table. Since external table the cannot play the role of Doris in the storage engine and will bring additional network overhead, it is recommended to determine whether to access through external tables or import data into Doris according to the actual access delay requirements for queries. - -4. Garbled code in Oracle access? - - Add the following parameters to the BE start up script: `export NLS_LANG=AMERICAN_AMERICA.AL32UTF8`R, Restart all be - -5. ANSI Driver or Unicode Driver? - - Currently, ODBC supports both ANSI and Unicode driver forms, while Doris only supports Unicode driver. If you force the use of ANSI driver, the query results may be wrong. - -6. Report Errors: `driver connect Err: 01000 [unixODBC][Driver Manager]Can't open lib 'Xxx' : file not found (0)` - - The driver for the corresponding data is not installed on each BE, or it is not installed in the be/conf/odbcinst.ini configure the correct path, or create the table with the driver namebe/conf/odbcinst.ini different - -7. Report Errors: `Fail to convert odbc value 'PALO ' TO INT on column:'A'` - - Type conversion error, type of column `A` mapping of actual column type is different, needs to be modified - -8. BE crash occurs when using old MySQL table and ODBC external driver at the same time - - -This is the compatibility problem between MySQL database ODBC driver and existing Doris depending on MySQL lib. The recommended solutions are as follows: - -* Method 1: replace the old MySQL External Table by ODBC External Table, recompile BE close options **WITH_MySQL** - -* Method 2: Do not use the latest 8. X MySQL ODBC driver replace with the 5. X MySQL ODBC driver - -9. Push down the filtering condition - - The current ODBC appearance supports push down under filtering conditions. MySQL external table can support push down under all conditions. The functions of other databases are different from Doris, which will cause the push down query to fail. At present, except for the MySQL, other databases do not support push down of function calls. Whether Doris pushes down the required filter conditions can be confirmed by the 'explain' query statement. - -10. Report Errors: `driver connect Err: xxx` - - Connection to the database fails. The` Err: part` represents the error of different database connection failures. This is usually a configuration problem. You should check whether the IP address, port or account password are mismatched. - - - diff --git a/docs/en/extending-doris/seatunnel/flink-sink.md b/docs/en/extending-doris/seatunnel/flink-sink.md deleted file mode 100644 index e8d09329d9..0000000000 --- a/docs/en/extending-doris/seatunnel/flink-sink.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -{ - "title": "Seatunnel Connector Flink Doris", - "language": "en" -} ---- - - - -# Seatunnel -The newest [Apache SeaTunnel (formerly waterdrop )](https://seatunnel.apache.org) already supports Doris's connector, SeaTunnel can use Spark engine and Flink engine to synchronize data to Doris. -## Flink Sink Doris(2.x) -Seatunnel Flink Sink Doris [plugin code](https://github.com/apache/incubator-seatunnel/tree/dev/seatunnel-connectors/seatunnel-connector-flink-doris) - -### Options -| name | type | required | default value | engine | -| --- | --- | --- | --- | --- | -| fenodes | string | yes | - | Flink | -| database | string | yes | - | Flink | -| table | string | yes | - | Flink | -| user | string | yes | - | Flink | -| password | string | yes | - | Flink | -| batch_size | int | no | 100 | Flink | -| interval | int | no |1000 | Flink | -| max_retries | int | no | 1 | Flink| -| doris.* | - | no | - | Flink | - -`fenodes [string]` - -Doris Fe http url, eg: 127.0.0.1:8030 - -`database [string]` - -Doris database - -`table [string]` - -Doris table - -`user [string]` - -Doris user - -`password [string]` - -Doris password - -`batch_size [int]` - -The maximum number of lines to write to Doris at a time, the default value is 100 - -`interval [int]` - -The flush interval (in milliseconds), after which the asynchronous thread writes the data in the cache to Doris. Set to 0 to turn off periodic writes. - -`max_retries [int]` - -Number of retries after writing to Doris fails - -`doris.* [string]` - -Import parameters for Stream load. For example: 'doris.column_separator' = ', ' etc. - -[More Stream Load parameter configuration](https://doris.apache.org/administrator-guide/load-data/stream-load-manual.html) - -### Examples -Socket To Doris -``` -env { - execution.parallelism = 1 -} -source { - SocketStream { - host = 127.0.0.1 - port = 9999 - result_table_name = "socket" - field_name = "info" - } -} -transform { -} -sink { - DorisSink { - fenodes = "127.0.0.1:8030" - user = root - password = 123456 - database = test - table = test_tbl - batch_size = 5 - max_retries = 1 - interval = 5000 - } -} - -``` -### Start command -``` -sh bin/start-seatunnel-flink.sh --config config/flink.streaming.conf -``` \ No newline at end of file diff --git a/docs/en/extending-doris/seatunnel/spark-sink.md b/docs/en/extending-doris/seatunnel/spark-sink.md deleted file mode 100644 index 5ef316e1dd..0000000000 --- a/docs/en/extending-doris/seatunnel/spark-sink.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -{ - "title": "Seatunnel Connector Spark Doris", - "language": "en" -} ---- - - - -# Seatunnel - -The newest [Apache SeaTunnel (waterdop) ](https://seatunnel.apache.org) has supported Doris connector, -seatunnel can load data by Spark engine or Flink engine. - -In fact,seatunnel load data by stream load function.Everyone is welcome to use - -# Install Seatunnel -[Seatunnel install](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v2/flink/installation) - -## Spark Sink Doris -### Options -| name | type | required | default value | engine | -| --- | --- | --- | --- | --- | -| fenodes | string | yes | - | Spark | -| database | string | yes | - | Spark | -| table | string | yes | - | Spark | -| user | string | yes | - | Spark | -| password | string | yes | - | Spark | -| batch_size | int | yes | 100 | Spark | -| doris.* | string | no | - | Spark | - -`fenodes [string]` - -Doris FE address:8030 - -`database [string]` - -Doris target database name - -`table [string]` - -Doris target table name - -`user [string]` - -Doris user name - -`password [string]` - -Doris user's password - -`batch_size [string]` - -Doris number of submissions per batch - -`doris. [string]` -Doris stream_load properties,you can use 'doris.' prefix + stream_load properties - -[More Doris stream_load Configurations](https://doris.apache.org/master/zh-CN/administrator-guide/load-data/stream-load-manual.html) - -### Examples -Hive to Doris - -Config properties -``` -env{ - spark.app.name = "hive2doris-template" -} - -spark { - spark.sql.catalogImplementation = "hive" -} - -source { - hive { - preSql = "select * from tmp.test" - result_table_name = "test" - } -} - -transform { -} - - -sink { - -Console { - - } - -Doris { - fenodes="xxxx:8030" - database="gl_mint_dim" - table="dim_date" - user="root" - password="root" - batch_size=1000 - doris.column_separator="\t" - doris.columns="date_key,date_value,day_in_year,day_in_month" - } -} -``` -Start command -``` -sh bin/start-waterdrop-spark.sh --master local[4] --deploy-mode client --config ./config/spark.conf -``` \ No newline at end of file diff --git a/docs/en/extending-doris/spark-doris-connector.md b/docs/en/extending-doris/spark-doris-connector.md deleted file mode 100644 index b7145654c0..0000000000 --- a/docs/en/extending-doris/spark-doris-connector.md +++ /dev/null @@ -1,286 +0,0 @@ ---- -{ - "title": "Spark Doris Connector", - "language": "en" -} ---- - - - -# Spark Doris Connector - -Spark Doris Connector can support reading data stored in Doris and writing data to Doris through Spark. - -Github: https://github.com/apache/incubator-doris-spark-connector - -- Support reading data from `Doris`. -- Support `Spark DataFrame` batch/stream writing data to `Doris` -- You can map the `Doris` table to` DataFrame` or `RDD`, it is recommended to use` DataFrame`. -- Support the completion of data filtering on the `Doris` side to reduce the amount of data transmission. - -## Version Compatibility - -| Connector | Spark | Doris | Java | Scala | -|---------------| ----- | ------ | ---- | ----- | -| 2.3.4-2.11.xx | 2.x | 0.12+ | 8 | 2.11 | -| 3.1.2-2.12.xx | 3.x | 0.12.+ | 8 | 2.12 | - -## Build and Install - -Ready to work - -1.Modify the `custom_env.sh.tpl` file and rename it to `custom_env.sh` - -2.Specify the thrift installation directory - -```bash -##source file content -#export THRIFT_BIN= -#export MVN_BIN= -#export JAVA_HOME= - -##amend as below,MacOS as an example -export THRIFT_BIN=/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift -#export MVN_BIN= -#export JAVA_HOME= - -Install `thrift` 0.13.0 (Note: `Doris` 0.15 and the latest builds are based on `thrift` 0.13.0, previous versions are still built with `thrift` 0.9.3) -Windows: - 1. Download: `http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe` - 2. Modify thrift-0.13.0.exe to thrift - -MacOS: - 1. Download: `brew install thrift@0.13.0` - 2. default address: /opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift - -Note: Executing `brew install thrift@0.13.0` on MacOS may report an error that the version cannot be found. The solution is as follows, execute it in the terminal: - 1. `brew tap-new $USER/local-tap` - 2. `brew extract --version='0.13.0' thrift $USER/local-tap` - 3. `brew install thrift@0.13.0` - Reference link: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c` - -Linux: - 1.Download source package: `wget https://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz` - 2.Install dependencies: `yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++` - 3.`tar zxvf thrift-0.13.0.tar.gz` - 4.`cd thrift-0.13.0` - 5.`./configure --without-tests` - 6.`make` - 7.`make install` - Check the version after installation is complete: thrift --version - Note: If you have compiled Doris, you do not need to install thrift, you can directly use $DORIS_HOME/thirdparty/installed/bin/thrift -``` - -Execute following command in source dir - -```bash -sh build.sh 2.3.4 2.11 ## spark 2.3.4 version, and scala 2.11 -sh build.sh 3.1.2 2.12 ## spark 3.1.2 version, and scala 2.12 -``` -> Note: If you check out the source code from tag, you can just run sh build.sh --tag without specifying the spark and scala versions. This is because the version in the tag source code is fixed. - -After successful compilation, the file `doris-spark-2.3.4-2.11-1.0.0-SNAPSHOT.jar` will be generated in the `output/` directory. Copy this file to `ClassPath` in `Spark` to use `Spark-Doris-Connector`. For example, `Spark` running in `Local` mode, put this file in the `jars/` folder. `Spark` running in `Yarn` cluster mode, put this file in the pre-deployment package. - -## Using Maven - -``` - - org.apache.doris - spark-doris-connector-3.1_2.12 - - 1.0.1 - -``` - -**Notes** - -Please replace the Connector version according to the different Spark and Scala versions. - -## Example -### Read - -#### SQL - -```sql -CREATE TEMPORARY VIEW spark_doris -USING doris -OPTIONS( - "table.identifier"="$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME", - "fenodes"="$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT", - "user"="$YOUR_DORIS_USERNAME", - "password"="$YOUR_DORIS_PASSWORD" -); - -SELECT * FROM spark_doris; -``` - -#### DataFrame - -```scala -val dorisSparkDF = spark.read.format("doris") - .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME") - .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT") - .option("user", "$YOUR_DORIS_USERNAME") - .option("password", "$YOUR_DORIS_PASSWORD") - .load() - -dorisSparkDF.show(5) -``` - -#### RDD - -```scala -import org.apache.doris.spark._ -val dorisSparkRDD = sc.dorisRDD( - tableIdentifier = Some("$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME"), - cfg = Some(Map( - "doris.fenodes" -> "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT", - "doris.request.auth.user" -> "$YOUR_DORIS_USERNAME", - "doris.request.auth.password" -> "$YOUR_DORIS_PASSWORD" - )) -) - -dorisSparkRDD.collect() -``` -### Write - -#### SQL - -```sql -CREATE TEMPORARY VIEW spark_doris -USING doris -OPTIONS( - "table.identifier"="$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME", - "fenodes"="$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT", - "user"="$YOUR_DORIS_USERNAME", - "password"="$YOUR_DORIS_PASSWORD" -); - -INSERT INTO spark_doris VALUES ("VALUE1","VALUE2",...); -# or -INSERT INTO spark_doris SELECT * FROM YOUR_TABLE -``` - -#### DataFrame(batch/stream) -```scala -## batch sink -val mockDataDF = List( - (3, "440403001005", "21.cn"), - (1, "4404030013005", "22.cn"), - (33, null, "23.cn") -).toDF("id", "mi_code", "mi_name") -mockDataDF.show(5) - -mockDataDF.write.format("doris") - .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME") - .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT") - .option("user", "$YOUR_DORIS_USERNAME") - .option("password", "$YOUR_DORIS_PASSWORD") - //other options - //specify the fields to write - .option("doris.write.fields","$YOUR_FIELDS_TO_WRITE") - .save() - -## stream sink(StructuredStreaming) -val kafkaSource = spark.readStream - .option("kafka.bootstrap.servers", "$YOUR_KAFKA_SERVERS") - .option("startingOffsets", "latest") - .option("subscribe", "$YOUR_KAFKA_TOPICS") - .format("kafka") - .load() -kafkaSource.selectExpr("CAST(key AS STRING)", "CAST(value as STRING)") - .writeStream - .format("doris") - .option("checkpointLocation", "$YOUR_CHECKPOINT_LOCATION") - .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME") - .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT") - .option("user", "$YOUR_DORIS_USERNAME") - .option("password", "$YOUR_DORIS_PASSWORD") - //other options - //specify the fields to write - .option("doris.write.fields","$YOUR_FIELDS_TO_WRITE") - .start() - .awaitTermination() -``` - -## Configuration - -### General - -| Key | Default Value | Comment | -| -------------------------------- | ----------------- | ------------------------------------------------------------ | -| doris.fenodes | -- | Doris FE http address, support multiple addresses, separated by commas | -| doris.table.identifier | -- | Doris table identifier, eg, db1.tbl1 | -| doris.request.retries | 3 | Number of retries to send requests to Doris | -| doris.request.connect.timeout.ms | 30000 | Connection timeout for sending requests to Doris | -| doris.request.read.timeout.ms | 30000 | Read timeout for sending request to Doris | -| doris.request.query.timeout.s | 3600 | Query the timeout time of doris, the default is 1 hour, -1 means no timeout limit | -| doris.request.tablet.size | Integer.MAX_VALUE | The number of Doris Tablets corresponding to an RDD Partition. The smaller this value is set, the more partitions will be generated. This will increase the parallelism on the Spark side, but at the same time will cause greater pressure on Doris. | -| doris.batch.size | 1024 | The maximum number of rows to read data from BE at one time. Increasing this value can reduce the number of connections between Spark and Doris. Thereby reducing the extra time overhead caused by network delay. | -| doris.exec.mem.limit | 2147483648 | Memory limit for a single query. The default is 2GB, in bytes. | -| doris.deserialize.arrow.async | false | Whether to support asynchronous conversion of Arrow format to RowBatch required for spark-doris-connector iteration | -| doris.deserialize.queue.size | 64 | Asynchronous conversion of the internal processing queue in Arrow format takes effect when doris.deserialize.arrow.async is true | -| doris.write.fields | -- | Specifies the fields (or the order of the fields) to write to the Doris table, fileds separated by commas.
By default, all fields are written in the order of Doris table fields. | -| sink.batch.size | 10000 | Maximum number of lines in a single write BE | -| sink.max-retries | 1 | Number of retries after writing BE failed | - -### SQL & Dataframe Configuration - -| Key | Default Value | Comment | -| ------------------------------- | ------------- | ------------------------------------------------------------ | -| user | -- | Doris username | -| password | -- | Doris password | -| doris.filter.query.in.max.count | 100 | In the predicate pushdown, the maximum number of elements in the in expression value list. If this number is exceeded, the in-expression conditional filtering is processed on the Spark side. | - -### RDD Configuration - -| Key | Default Value | Comment | -| --------------------------- | ------------- | ------------------------------------------------------------ | -| doris.request.auth.user | -- | Doris username | -| doris.request.auth.password | -- | Doris password | -| doris.read.field | -- | List of column names in the Doris table, separated by commas | -| doris.filter.query | -- | Filter expression of the query, which is transparently transmitted to Doris. Doris uses this expression to complete source-side data filtering. | - - - -## Doris & Spark Column Type Mapping - -| Doris Type | Spark Type | -| ---------- | -------------------------------- | -| NULL_TYPE | DataTypes.NullType | -| BOOLEAN | DataTypes.BooleanType | -| TINYINT | DataTypes.ByteType | -| SMALLINT | DataTypes.ShortType | -| INT | DataTypes.IntegerType | -| BIGINT | DataTypes.LongType | -| FLOAT | DataTypes.FloatType | -| DOUBLE | DataTypes.DoubleType | -| DATE | DataTypes.StringType1 | -| DATETIME | DataTypes.StringType1 | -| BINARY | DataTypes.BinaryType | -| DECIMAL | DecimalType | -| CHAR | DataTypes.StringType | -| LARGEINT | DataTypes.StringType | -| VARCHAR | DataTypes.StringType | -| DECIMALV2 | DecimalType | -| TIME | DataTypes.DoubleType | -| HLL | Unsupported datatype | - -* Note: In Connector, `DATE` and` DATETIME` are mapped to `String`. Due to the processing logic of the Doris underlying storage engine, when the time type is used directly, the time range covered cannot meet the demand. So use `String` type to directly return the corresponding time readable text. diff --git a/docs/en/extending-doris/udf/contribute-udf.md b/docs/en/extending-doris/udf/contribute-udf.md deleted file mode 100644 index 16356a7f65..0000000000 --- a/docs/en/extending-doris/udf/contribute-udf.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -{ - "title": "Contribute UDF", - "language": "en" -} ---- - - - -# Contribute UDF - -This manual mainly introduces how external users can contribute their own UDF functions to the Doris community. - -## Prerequisites - -1. UDF function is universal - - The versatility here mainly refers to: UDF functions are widely used in certain business scenarios. Such UDF functions are valuable and can be used directly by other users in the community. - - If you are not sure whether the UDF function you wrote is universal, you can send an email to `dev@doris.apache.org` or directly create an ISSUE to initiate the discussion. - -2. UDF has completed testing and is running normally in the user's production environment - -## Ready to work - -1. UDF source code -2. User Manual of UDF - -### Source code - -Create a folder for UDF functions under `contrib/udf/src/`, and store the source code and CMAKE files here. The source code to be contributed should include: `.h`, `.cpp`, `CMakeFile.txt`. Taking udf_samples as an example here, first create a new folder under the `contrib/udf/src/` path and store the source code. - -``` - ├──contrib - │ └── udf - │ ├── CMakeLists.txt - │ └── src - │ └── udf_samples - │ ├── CMakeLists.txt - │ ├── uda_sample.cpp - │ ├── uda_sample.h - │ ├── udf_sample.cpp - │ └── udf_sample.h - -``` - -1. CMakeLists.txt - - After the user's `CMakeLists.txt` is placed here, a small amount of changes are required. Just remove `include udf` and `udf lib`. The reason for the removal is that it has been declared in the CMake file at the `contrib/udf` level. - -### manual - -The user manual needs to include: UDF function definition description, applicable scenarios, function syntax, how to compile UDF, how to use UDF in Doris, and use examples. - -1. The user manual must contain both Chinese and English versions and be stored under `docs/zh-CN/extending-doris/contrib/udf` and `docs/en/extending-doris/contrib/udf` respectively. - - ``` - ├── docs - │   └── zh-CN - │   └──extending-doris - │ └──udf - │ └──contrib - │ ├── udf-simple-manual.md - - ``` - - ``` - ├── docs - │   └── en - │   └──extending-doris - │ └──udf - │ └──contrib - │ ├── udf-simple-manual.md - ``` - -2. Add the two manual files to the sidebar in Chinese and English. - - ``` - vi docs/.vuepress/sidebar/zh-CN.js - { - title: "用户贡献的 UDF", - directoryPath: "contrib/", - children: - [ - "udf-simple-manual", - ], - }, - ``` - - ``` - vi docs/.vuepress/sidebar/en.js - { - title: "Users contribute UDF", - directoryPath: "contrib/", - children: - [ - "udf-simple-manual", - ], - }, - - ``` - -## Contribute UDF to the community - -When you meet the conditions and prepare the code, you can contribute UDF to the Doris community after the document. Simply submit the request (PR) on [Github](https://github.com/apache/incubator-doris). See the specific submission method: [Pull Request (PR)](https://help.github.com/articles/about-pull-requests/). - -Finally, when the PR assessment is passed and merged. Congratulations, your UDF becomes a third-party UDF supported by Doris. You can check it out in the extended functions section of [Doris official website](http://doris.apache.org/master/zh-CN/)~. diff --git a/docs/en/extending-doris/udf/java-user-defined-function.md b/docs/en/extending-doris/udf/java-user-defined-function.md deleted file mode 100644 index efbd293f78..0000000000 --- a/docs/en/extending-doris/udf/java-user-defined-function.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -{ - "title": "[Experimental] Java UDF", - "language": "en" -} ---- - - - -# Java UDF - -Java UDF provides users with a Java interface written in UDF to facilitate the execution of user-defined functions in Java language. Compared with native UDF implementation, Java UDF has the following advantages and limitations: -1. The advantages - * Compatibility: Using Java UDF can be compatible with different Doris versions, so when upgrading Doris version, Java UDF does not need additional migration. At the same time, Java UDF also follows the same programming specifications as hive / spark and other engines, so that users can directly move Hive / Spark UDF jar to Doris. - * Security: The failure or crash of Java UDF execution will only cause the JVM to report an error, not the Doris process to crash. - * Flexibility: In Java UDF, users can package the third-party dependencies together in the user jar. - -2. Restrictions on use - * Performance: Compared with native UDF, Java UDF will bring additional JNI overhead, but through batch execution, we have minimized the JNI overhead as much as possible. - * Vectorized engine: Java UDF is only supported on vectorized engine now. - -## Write UDF functions - -This section mainly introduces how to develop a Java UDF. Samples for the Java version are provided under `samples/doris-demo/java-udf-demo/` for your reference. - -To use Java UDF, the main entry of UDF must be the `evaluate` function. This is consistent with other engines such as Hive. In the example of `AddOne`, we have completed the operation of adding an integer as the UDF. - -It is worth mentioning that this example is not only the Java UDF supported by Doris, but also the UDF supported by Hive, that's to say, for users, Hive UDF can be directly migrated to Doris. - -## Create UDF - -Currently, UDAF and UDTF are not supported. - -```sql -CREATE FUNCTION -name ([,...]) -[RETURNS] rettype -PROPERTIES (["key"="value"][,...]) -``` -Instructions: - -1. `symbol` in properties represents the class name containing UDF classes. This parameter must be set. -2. The jar package containing UDF represented by `file` in properties must be set. -3. The UDF call type represented by `type` in properties is native by default. When using java UDF, it is transferred to `Java_UDF`. -4. `name`: A function belongs to a DB and name is of the form`dbName`.`funcName`. When `dbName` is not explicitly specified, the db of the current session is used`dbName`. - -Sample: -```sql -CREATE FUNCTION java_udf_add_one(int) RETURNS int PROPERTIES ( - "file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar", - "symbol"="org.apache.doris.udf.AddOne", - "type"="JAVA_UDF" -); -``` - -## Use UDF - -Users must have the `SELECT` permission of the corresponding database to use UDF/UDAF. - -The use of UDF is consistent with ordinary function methods. The only difference is that the scope of built-in functions is global, and the scope of UDF is internal to DB. When the link session is inside the data, directly using the UDF name will find the corresponding UDF inside the current DB. Otherwise, the user needs to display the specified UDF database name, such as `dbName`.`funcName`. - -## Delete UDF - -When you no longer need UDF functions, you can delete a UDF function by the following command, you can refer to `DROP FUNCTION`. - -## Example -Examples of Java UDF are provided in the `samples/doris-demo/java-udf-demo/` directory. See the `README.md` in each directory for details on how to use it. - -## Unsupported Use Case -At present, Java UDF is still in the process of continuous development, so some features are **not completed**. -1. Complex data types (date, HLL, bitmap) are not supported. -2. Memory management and statistics of JVM and Doris have not been unified. diff --git a/docs/en/extending-doris/udf/native-user-defined-function.md b/docs/en/extending-doris/udf/native-user-defined-function.md deleted file mode 100644 index c32f17549c..0000000000 --- a/docs/en/extending-doris/udf/native-user-defined-function.md +++ /dev/null @@ -1,264 +0,0 @@ ---- -{ - "title": "Native User Defined Function", - "language": "en" -} ---- - - - -# Native User Defined Function -UDF is mainly suitable for scenarios where the analytical capabilities that users need do not possess. Users can implement custom functions according to their own needs, and register with Doris through the UDF framework to expand Doris' capabilities and solve user analysis needs. - -There are two types of analysis requirements that UDF can meet: UDF and UDAF. UDF in this article refers to both. - -1. UDF: User-defined function, this function will operate on a single line and output a single line result. When users use UDFs for queries, each row of data will eventually appear in the result set. Typical UDFs are string operations such as concat(). -2. UDAF: User-defined aggregation function. This function operates on multiple lines and outputs a single line of results. When the user uses UDAF in the query, each group of data after grouping will finally calculate a value and expand the result set. A typical UDAF is the set operation sum(). Generally speaking, UDAF will be used together with group by. - -This document mainly describes how to write a custom UDF function and how to use it in Doris. - -## Writing UDF functions - -Before using UDF, users need to write their own UDF functions under Doris' UDF framework. In the `contrib/udf/src/udf_samples/udf_sample.h|cpp` file is a simple UDF Demo. - -Writing a UDF function requires the following steps. - -### Writing functions - -Create the corresponding header file and CPP file, and implement the logic you need in the CPP file. Correspondence between the implementation function format and UDF in the CPP file. - -Users can put their own source code in a folder. Taking udf_sample as an example, the directory structure is as follows: - -``` -└── udf_samples - ├── uda_sample.cpp - ├── uda_sample.h - ├── udf_sample.cpp - └── udf_sample.h -``` - -#### Non-variable parameters - -For UDFs with non-variable parameters, the correspondence between the two is straightforward. -For example, the UDF of `INT MyADD(INT, INT)` will correspond to `IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2)`. - -1. `AddUdf` can be any name, as long as it is specified when creating UDF. -2. The first parameter in the implementation function is always `FunctionContext*`. The implementer can obtain some query-related content through this structure, and apply for some memory to be used. The specific interface used can refer to the definition in `udf/udf.h`. -3. In the implementation function, the second parameter needs to correspond to the UDF parameter one by one, for example, `IntVal` corresponds to `INT` type. All types in this part must be referenced with `const`. -4. The return parameter must correspond to the type of UDF parameter. - -#### variable parameter - -For variable parameters, you can refer to the following example, corresponding to UDF`String md5sum(String, ...)` -The implementation function is `StringVal md5sumUdf(FunctionContext* ctx, int num_args, const StringVal* args)` - -1. `md5sumUdf` can also be changed arbitrarily, just specify it when creating. -2. The first parameter is the same as the non-variable parameter function, and the passed in is a `FunctionContext*`. -3. The variable parameter part consists of two parts. First, an integer is passed in, indicating that there are several parameters behind. An array of variable parameter parts is passed in later. - -#### Type correspondence - -|UDF Type|Argument Type| -|----|---------| -|TinyInt|TinyIntVal| -|SmallInt|SmallIntVal| -|Int|IntVal| -|BigInt|BigIntVal| -|LargeInt|LargeIntVal| -|Float|FloatVal| -|Double|DoubleVal| -|Date|DateTimeVal| -|Datetime|DateTimeVal| -|Char|StringVal| -|Varchar|StringVal| -|Decimal|DecimalVal| - - -## Compile UDF function - -Since the UDF implementation relies on Doris' UDF framework, the first step in compiling UDF functions is to compile Doris, that is, the UDF framework. - -After the compilation is completed, the static library file of the UDF framework will be generated. Then introduce the UDF framework dependency and compile the UDF. - -### Compile Doris - -Running `sh build.sh` in the root directory of Doris will generate a static library file of the UDF framework `headers|libs` in `output/udf/` - -``` -├── output -│ └── udf -│ ├── include -│ │ ├── uda_test_harness.h -│ │ └── udf.h -│ └── lib -│ └── libDorisUdf.a - -``` - -### Writing UDF compilation files - -1. Prepare thirdparty - - The thirdparty folder is mainly used to store thirdparty libraries that users' UDF functions depend on, including header files and static libraries. It must contain the two files `udf.h` and `libDorisUdf.a` in the dependent Doris UDF framework. - - Taking udf_sample as an example here, the source code is stored in the user's own `udf_samples` directory. Create a thirdparty folder in the same directory to store the static library. The directory structure is as follows: - - ``` - ├── thirdparty - │ │── include - │ │ └── udf.h - │ └── lib - │ └── libDorisUdf.a - └── udf_samples - - ``` - - `udf.h` is the UDF frame header file. The storage path is `doris/output/udf/include/udf.h`. Users need to copy the header file in the Doris compilation output to their include folder of `thirdparty`. - - `libDorisUdf.a` is a static library of UDF framework. After Doris is compiled, the file is stored in `doris/output/udf/lib/libDorisUdf.a`. The user needs to copy the file to the lib folder of his `thirdparty`. - - *Note: The static library of UDF framework will not be generated until Doris is compiled. - -2. Prepare to compile UDF's CMakeFiles.txt - - CMakeFiles.txt is used to declare how UDF functions are compiled. Stored in the source code folder, level with user code. Here, taking udf_samples as an example, the directory structure is as follows: - - ``` - ├── thirdparty - └── udf_samples - ├── CMakeLists.txt - ├── uda_sample.cpp - ├── uda_sample.h - ├── udf_sample.cpp - └── udf_sample.h - ``` - - + Need to show declaration reference `libDorisUdf.a` - + Declare `udf.h` header file location - - - Take udf_sample as an example - - ``` - # Include udf - include_directories(thirdparty/include) - - # Set all libraries - add_library(udf STATIC IMPORTED) - set_target_properties(udf PROPERTIES IMPORTED_LOCATION thirdparty/lib/libDorisUdf.a) - - # where to put generated libraries - set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") - - # where to put generated binaries - set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") - - add_library(udfsample SHARED udf_sample.cpp) - target_link_libraries(udfsample - udf - -static-libstdc++ - -static-libgcc - ) - - add_library(udasample SHARED uda_sample.cpp) - target_link_libraries(udasample - udf - -static-libstdc++ - -static-libgcc - ) - ``` - - If the user's UDF function also depends on other thirdparty libraries, you need to declare include, lib, and add dependencies in `add_library`. - -The complete directory structure after all files are prepared is as follows: - -``` - ├── thirdparty - │ │── include - │ │ └── udf.h - │ └── lib - │ └── libDorisUdf.a - └── udf_samples - ├── CMakeLists.txt - ├── uda_sample.cpp - ├── uda_sample.h - ├── udf_sample.cpp - └── udf_sample.h -``` - -Prepare the above files and you can compile UDF directly - -### Execute compilation - -Create a build folder under the udf_samples folder to store the compilation output. - -Run the command `cmake ../` in the build folder to generate a Makefile, and execute make to generate the corresponding dynamic library. - -``` -├── thirdparty -├── udf_samples - └── build -``` - -### Compilation result - -After the compilation is completed, the UDF dynamic link library is successfully generated. Under `build/src/`, taking udf_samples as an example, the directory structure is as follows: - -``` -├── thirdparty -├── udf_samples - └── build - └── src - └── udf_samples - ├── libudasample.so -   └── libudfsample.so - -``` - -## Create UDF function - -After following the above steps, you can get the UDF dynamic library (that is, the `.so` file in the compilation result). You need to put this dynamic library in a location that can be accessed through the HTTP protocol. - -Then log in to the Doris system and create a UDF function in the mysql-client through the `CREATE FUNCTION` syntax. You need to have ADMIN authority to complete this operation. At this time, there will be a UDF created in the Doris system. - -``` -CREATE [AGGREGATE] FUNCTION -name ([argtype][,...]) -[RETURNS] rettype -PROPERTIES (["key"="value"][,...]) -``` -Description: - -1. "Symbol" in PROPERTIES means that the symbol corresponding to the entry function is executed. This parameter must be set. You can get the corresponding symbol through the `nm` command, for example, `_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_` obtained by `nm libudfsample.so | grep AddUdf` is the corresponding symbol. -2. The object_file in PROPERTIES indicates where it can be downloaded to the corresponding dynamic library. This parameter must be set. -3. name: A function belongs to a certain DB, and the name is in the form of `dbName`.`funcName`. When `dbName` is not explicitly specified, the db where the current session is located is used as `dbName`. - -For specific use, please refer to `CREATE FUNCTION` for more detailed information. - -## Use UDF - -Users must have the `SELECT` permission of the corresponding database to use UDF/UDAF. - -The use of UDF is consistent with ordinary function methods. The only difference is that the scope of built-in functions is global, and the scope of UDF is internal to DB. When the link session is inside the data, directly using the UDF name will find the corresponding UDF inside the current DB. Otherwise, the user needs to display the specified UDF database name, such as `dbName`.`funcName`. - - -## Delete UDF - -When you no longer need UDF functions, you can delete a UDF function by the following command, you can refer to `DROP FUNCTION`. diff --git a/docs/en/extending-doris/udf/remote-user-defined-function.md b/docs/en/extending-doris/udf/remote-user-defined-function.md deleted file mode 100644 index aa8cc3a3c9..0000000000 --- a/docs/en/extending-doris/udf/remote-user-defined-function.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -{ - "title": "Remote User Defined Function Service", - "language": "en" -} ---- - - - -# User Defined Function Rpc - -Remote UDF Service The Remote UDF Service can be accessed through RPC to implement the execution of user-defined functions. Compared with Native UDF implementations, Remote UDF Service has the following advantages and limitations: -1. The advantage - * Cross-language: UDF services can be written in all languages supported by Protobuf. - * Security: UDF execution failure or crash only affects the UDF Service and does not cause the Doris process to crash. - * Flexibility: Any other Service or library class can be invoked within a UDF Service to meet a wider variety of business requirements. - -2. Restrictions on use - * Performance: Compared to Native UDFs, UDF services incur extra network overhead and thus have much lower performance than Native UDFs. At the same time, the implementation of the UDF Service also affects the execution efficiency of the function. Users need to deal with problems such as high concurrency and thread safety by themselves. - * Single line mode and batch mode: Doris's original query execution framework based on row memory would execute one UDF RPC call for each row of data, so the execution efficiency was very poor. However, under the new vectorization execution framework, one UDF RPC call would be executed for each batch of data (2048 rows by default), so the performance was significantly improved. In actual tests, the performance of Remote UDF based on vectorization and batch processing is similar to that of Native UDF based on rowmemory, which can be used for reference. - -## Write UDF functions - -This section describes how to develop a Remote RPC Service. Samples for the Java version are provided under `samples/doris-demo/udf-demo/` for your reference. - -### Copy the proto file - -Copy gensrc/proto/function_service.proto and gensrc/proto/types.proto to Rpc service - -- function_service.proto - - PFunctionCallRequest - - function_name:The function name, corresponding to the symbol specified when the function was created - - args:The parameters passed by the method - - context:Querying context Information - - PFunctionCallResponse - - result:Return result - - status:Return Status, 0 indicates normal - - PCheckFunctionRequest - - function:Function related information - - match_type:Matching type - - PCheckFunctionResponse - - status:Return status, 0 indicates normal - -### Generated interface - -Use protoc generate code, and specific parameters are viewed using protoc -h - -### Implementing an interface - -The following three methods need to be implemented -- fnCall:Used to write computational logic -- checkFn:Used to verify function names, parameters, and return values when creating UDFs -- handShake:Used for interface probe - -## Create UDF - -Currently, UDAF and UDTF are not supported - -```sql -CREATE FUNCTION -name ([,...]) -[RETURNS] rettype -PROPERTIES (["key"="value"][,...]) -``` -Instructions: - -1. PROPERTIES中`symbol`Represents the name of the method passed by the RPC call, which must be set. -2. PROPERTIES中`object_file`Represents the RPC service address. Currently, a single address and a cluster address in BRPC-compatible format are supported. Refer to the cluster connection mode[Format specification](https://github.com/apache/incubator-brpc/blob/master/docs/cn/client.md#%E8%BF%9E%E6%8E%A5%E6%9C%8D%E5%8A%A1%E9%9B%86%E7%BE%A4). -3. PROPERTIES中`type`Indicates the UDF call type, which is Native by default. Rpc is transmitted when Rpc UDF is used. -4. name: A function belongs to a DB and name is of the form`dbName`.`funcName`. When `dbName` is not explicitly specified, the db of the current session is used`dbName`. - -Sample: -```sql -CREATE FUNCTION rpc_add(INT, INT) RETURNS INT PROPERTIES ( - "SYMBOL"="add_int", - "OBJECT_FILE"="127.0.0.1:9090", - "TYPE"="RPC" -); -``` - -## Use UDF - -Users must have the `SELECT` permission of the corresponding database to use UDF/UDAF. - -The use of UDF is consistent with ordinary function methods. The only difference is that the scope of built-in functions is global, and the scope of UDF is internal to DB. When the link session is inside the data, directly using the UDF name will find the corresponding UDF inside the current DB. Otherwise, the user needs to display the specified UDF database name, such as `dbName`.`funcName`. - -## Delete UDF - -When you no longer need UDF functions, you can delete a UDF function by the following command, you can refer to `DROP FUNCTION`. - -## Example -Examples of rpc server implementations and cpp/java/python languages are provided in the `samples/doris-demo/` directory. See the `README.md` in each directory for details on how to use it. \ No newline at end of file diff --git a/new-docs/en/faq/data-faq.md b/docs/en/faq/data-faq.md similarity index 100% rename from new-docs/en/faq/data-faq.md rename to docs/en/faq/data-faq.md diff --git a/docs/en/faq/error.md b/docs/en/faq/error.md deleted file mode 100644 index df0905dcd2..0000000000 --- a/docs/en/faq/error.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -{ - "title": "Common Error", - "language": "en" -} ---- - - - -# Common Error - -This document is mainly used to record the errors reported during the use of Doris. If you encounter some errors, you are welcome to contribute to us for updates. - - -### E1. Query error: Failed to get scan range, no queryable replica found in tablet: xxxx - -This situation is because the corresponding tablet does not find a copy that can be queried, usually because the BE is down, the copy is missing, and so on. You can use the `show tablet tablet_id` statement first, and then execute the following `show proc` statement to view the copy information corresponding to this tablet, and check whether the copy is complete. At the same time, you can use the `show proc "/cluster_balance"` information to query the progress of replica scheduling and repair in the cluster. - -For commands related to data copy management, please refer to [Data Copy Management](../administrator-guide/operation/tablet-repair-and-balance.md). - -### E2. FE failed to start, fe.log keeps scrolling "wait catalog to be ready. FE type UNKNOWN" - -There are usually two reasons for this problem: - -1. The local IP obtained when the FE is started this time is inconsistent with the last time, usually because the `priority_network` is not set correctly, the wrong IP address is matched when the FE is started. Need to modify `priority_network` and restart FE. - -2. Most Follower FE nodes in the cluster are not started. For example, there are 3 Followers and only one is started. At this time, at least one other FE needs to be also activated, and the FE electable group can elect the Master to provide services. - -If none of the above conditions can be resolved, you can follow the [Metadata Operation and Maintenance Document] (../administrator-guide/operation/metadata-operation.md) in the Doris official website to restore. - -### E3. tablet writer write failed, tablet_id=27306172, txn_id=28573520, err=-235 or -215 or -238 - -This error usually occurs during data import operations. The error code of the new version is -235, and the error code of the old version may be -215. The meaning of this error is that the data version of the corresponding tablet exceeds the maximum limit (default 500, controlled by the BE parameter `max_tablet_version_num`), and subsequent writes will be rejected. For example, the error in the question means that the data version of the tablet 27306172 exceeds the limit. - -This error is usually because the import frequency is too high, which is greater than the compaction speed of the background data, which causes the version to accumulate and eventually exceeds the limit. At this point, we can first use the show tablet 27306172 statement, and then execute the show proc statement in the result to view the status of each copy of the tablet. The versionCount in the result represents the number of versions. If you find that there are too many versions of a copy, you need to reduce the import frequency or stop importing, and observe whether the number of versions drops. If the version number still does not decrease after the import is stopped, you need to go to the corresponding BE node to check the be.INFO log, search for the tablet id and compaction keywords, and check whether the compaction is running normally. For compaction tuning related, you can refer to the ApacheDoris public account article: Doris Best Practice-Compaction Tuning (3) - -The -238 error usually occurs when the amount of imported data in the same batch is too large, which leads to too many Segment files for a certain tablet (the default is 200, which is controlled by the BE parameter `max_segment_num_per_rowset`). At this time, it is recommended to reduce the amount of data imported in one batch, or to appropriately increase the value of the BE configuration parameter to solve the problem. - -### E4. tablet 110309738 has few replicas: 1, alive backends: [10003] - -This error may occur during query or import operation. It usually means that the copy of the tablet is abnormal. - -At this point, you can first check whether the BE node is down by using the show backends command, such as the isAlive field is false, or LastStartTime is the most recent time (indicating that it has been restarted recently). If the BE is down, you need to go to the node corresponding to the BE and check the be.out log. If the BE is down due to an exception, usually the exception stack will be printed in be.out to help troubleshoot the problem. If there is no error stack in be.out. You can use the linux command dmesg -T to check whether the process is killed by the system because of OOM. - -If no BE node is down, you need to use the show tablet 110309738 statement, and then execute the show proc statement in the result to check the status of each copy of the tablet for further investigation. - -### E5. disk xxxxx on backend xxx exceed limit usage - -It usually appears in operations such as import and Alter. This error means that the usage of the corresponding disk corresponding to the BE exceeds the threshold (95% by default). At this time, you can use the show backends command first, where MaxDiskUsedPct shows the usage of the disk with the highest usage on the corresponding BE. If If it exceeds 95%, this error will be reported. - -At this time, you need to go to the corresponding BE node to check the usage in the data directory. The trash directory and snapshot directory can be manually cleaned up to free up space. If the data directory occupies a lot, you need to consider deleting some data to free up space. For details, please refer to [Disk Space Management](../administrator-guide/operation/disk-capacity.md). - -### E6. invalid cluster id: xxxx - -This error may appear in the results of the show backends or show frontends commands. It usually appears in the error message column of a certain FE or BE node. The meaning of this error is that after Master FE sends heartbeat information to this node, the node finds that the cluster id carried in the heartbeat information is different from the cluster id stored locally, so it refuses to respond to the heartbeat. - -Doris' Master FE node will actively send a heartbeat to each FE or BE node, and will carry a cluster_id in the heartbeat information. The cluster_id is the unique cluster ID generated by the Master FE when a cluster is initialized. When the FE or BE receives the heartbeat information for the first time, it will save the cluster_id locally in the form of a file. The FE file is in the image/ directory of the metadata directory, and BE has a cluster_id file in all data directories. After that, every time a node receives a heartbeat, it will compare the content of the local cluster_id with the content in the heartbeat. If it is inconsistent, it will refuse to respond to the heartbeat. - -This mechanism is a node authentication mechanism to prevent receiving wrong heartbeat information from nodes outside the cluster. - -If you need to recover from this error. First, confirm whether all nodes are the correct nodes in the cluster. After that, for the FE node, you can try to modify the cluster_id value in the image/VERSION file in the metadata directory and restart the FE. For BE nodes, you can delete cluster_id files in all data directories and restart BE. - -### E7. Import data by calling stream load through a Java program. When a batch of data is large, a Broken Pipe error may be reported - -In addition to Broken Pipe, there may be other strange errors. - -This situation usually occurs after opening httpv2. Because httpv2 is an http service implemented using spring boot, and uses tomcat as the default built-in container. But tomcat's handling of 307 forwarding seems to have some problems, so the built-in container will be modified to jetty later. In addition, the version of apache http client in the java program needs to use a version later than 4.5.13. In the previous version, there were also some problems with the processing of forwarding. - -So this problem can be solved in two ways: - -1. Turn off httpv2 - - Add enable_http_server_v2=false in fe.conf and restart FE. However, the new UI interface can no longer be used in this way, and some new interfaces based on httpv2 cannot be used later. (Normal import queries are not affected). - -2. Upgrade - - You can upgrade to Doris 0.15 and later versions, this problem has been fixed. - - -### E8. `Lost connection to MySQL server at'reading initial communication packet', system error: 0` - -If the following problems occur when using the MySQL client to connect to Doris, this is usually caused by the difference between the jdk version used when compiling FE and the jdk version used when running FE. -Note that when using docker image to compile, the default JDK version is openjdk 11, you can switch to openjdk 8 by command (see the compilation document for details). - -### E9. -214 error - -When performing operations such as load and query, you may encounter the following errors: - -``` -failed to initialize storage reader. tablet=63416.1050661139.aa4d304e7a7aff9c-f0fa7579928c85a0, res=-214, backend=192.168.100.10 -``` - -A -214 error means that the data version of the corresponding tablet is missing. For example, the above error indicates that the data version of the replica of tablet 63416 on the BE of 192.168.100.10 is missing. (There may be other similar error codes, which can be checked and repaired in the following ways). - -Normally, if your data has multiple replicas, the system will automatically repair these problematic replicas. You can troubleshoot through the following steps: - -First, use the `show tablet 63416` statement and execute the `show proc xxx` statement in the result to view the status of each replica of the corresponding tablet. Usually we need to care about the data in the `Version` column. - -Under normal circumstances, the Version of multiple replicas of a tablet should be the same. And it is the same as the VisibleVersion of the corresponding partition. - -You can use `show partitions from tblx` to view the corresponding partition version (the partition corresponding to the tablet can be obtained in the `show tablet` statement.) - -At the same time, you can also visit the URL in the CompactionStatus column of the `show proc` statement (just open it in the browser) to view more specific version information, to check which version is missing. - -If there is no automatic repair for a long time, you need to use the `show proc "/cluster_balance"` statement to view the tablet repair and scheduling tasks currently being performed by the system. It may be because there are a large number of tablets waiting to be scheduled, which leads to a long repair time. You can follow the records in `pending_tablets` and `running_tablets`. - -Furthermore, you can use the `admin repair` statement to specify the priority to repair a table or partition. For details, please refer to `help admin repair`; - -If it still cannot be repaired, then in the case of multiple replicas, we use the `admin set replica status` command to force the replica to go offline. For details, please refer to the example of `help admin set replica status` to set the status of the replica to bad. (After set to bad, the replica will not be accessed again. And will be automatically repaired later. But before the operation, you should make sure that the other replicas are normal) - -### E10. Not connected to 192.168.100.1:8060 yet, server_id=384 - -We may encounter this error when loading or querying. If you go to the corresponding BE log to check, you may also find similar errors. - -This is an RPC error, and there are usually two possibilities: 1. The corresponding BE node is down. 2. rpc congestion or other errors. - -If the BE node is down, you need to check the specific reason for the downtime. Only the problem of rpc congestion is discussed here. - -One situation is OVERCROWDED, which means that a large amount of unsent data at the rpc client exceeds the threshold. BE has two parameters related to it: - -1. `brpc_socket_max_unwritten_bytes`: The default is 1GB. If the unwritten data exceeds this value, an error will be reported. You can modify this value appropriately to avoid OVERCROWDED errors. (But this cures the symptoms rather than the root cause, essentially congestion still occurs). -2. `tablet_writer_ignore_eovercrowded`: The default is false. If set to true, Doris will ignore OVERCROWDED errors during the load process. This parameter is mainly used to avoid load failure and improve the stability of load. - -The second is that the packet size of rpc exceeds `max_body_size`. This problem may occur if the query contains a very large String type or a Bitmap type. It can be circumvented by modifying the following BE parameters: - -1. `brpc_max_body_size`: The default is 3GB. - -### E11. `recoveryTracker should overlap or follow on disk last VLSN of 4,422,880 recoveryFirst= 4,422,882 UNEXPECTED_STATE_FATAL` - -Sometimes when restarting the Fe, the above error will occur (usually only in the case of multiple followers), and the difference between the two values in the error is 2. As a result, the Fe startup fails. - -This is a bug in bdbje that has not been resolved. In this case, metadata can only be recovered through fault recovery in [metadata operation and maintenance manual](../administrator-guide/operation/metadata-operation.md). - -### E12.Doris compile and install JDK version incompatibility problem - -When I use Docker to compile Doris myself, start FE after compiling and installing, ```java.lang.Suchmethoderror: java.nio.ByteBuffer.limit (I)Ljava/nio/ByteBuffer; ``` exception information, this is because the default in Docker is JDK 11. If your installation environment is using JDK8, you need to switch the JDK environment to JDK8 in Docker. For the specific switching method, refer to [Compilation](https://doris.apache.org/installing/compilation.html) diff --git a/docs/en/faq/faq.md b/docs/en/faq/faq.md deleted file mode 100644 index 27d14bc910..0000000000 --- a/docs/en/faq/faq.md +++ /dev/null @@ -1,297 +0,0 @@ ---- -{ - "title": "FAQ", - "language": "en" -} ---- - - - -# FAQ - -This document is mainly used to record common problems in the use of Doris. Will be updated from time to time. - -### Q1. Use Stream Load to access the public network address of FE to import data, and it is redirected to the internal network IP? - -When the connection target of stream load is the http port of FE, FE will only randomly select a BE node for http 307 redirect operation, so the user's request is actually sent to a BE designated by FE. The redirect returns the ip of BE, which is the intranet IP. So if you send the request through the public IP of FE, it is very likely that you will not be able to connect because you are redirected to the intranet address. - -The usual approach is to ensure that you can access the intranet IP address, or assume a load balance for all BE upper layers, and then directly send the stream load request to the load balancer, and the load balancer transparently transmits the request to the BE node . - -### Q2. When the BE node is offline through DECOMMISSION, why is there always some tablet remaining? - -During the offline process, check the tabletNum of the offline node through show backends, and you will observe that the number of tabletNum is decreasing, indicating that the data fragments are migrating from this node. When the number is reduced to 0, the system will automatically delete this node. But in some cases, tabletNum does not change after it drops to a certain value. This can usually have the following two reasons: - -1. These tablets belong to the table, partition, or materialized view that has just been deleted. The objects that have just been deleted will remain in the recycle bin. The offline logic will not process these fragments. You can modify the resident time of the object in the recycle bin by modifying the configuration parameter catalog_trash_expire_second of FE. When the object is deleted from the recycle bin, these tablets will be processed. - -2. There is a problem with the migration task of these tablets. At this time, you need to check the error of the specific task through show proc "/cluster_balance". - -For the above situation, you can first check whether the cluster still has unhealthy shards through show proc "/statistic". If it is 0, you can delete the BE directly through the drop backend statement. Otherwise, you need to check the copy status of unhealthy shards. - - -### Q3. How should priorty_network be set? - -Priorty_network is a configuration parameter for both FE and BE. This parameter is mainly used to help the system choose the correct network card IP as its own IP. It is recommended to set this parameter explicitly under any circumstances to prevent the problem of incorrect IP selection caused by the addition of a new network card to the subsequent machine. - -The value of priorty_network is expressed in CIDR format. It is divided into two parts, the first part is a dotted decimal IP address, and the second part is a prefix length. For example, 10.168.1.0/8 will match all 10.xx.xx.xx IP addresses, and 10.168.1.0/16 will match all 10.168.xx.xx IP addresses. - -The reason for using the CIDR format instead of directly specifying a specific IP is to ensure that all nodes can use uniform configuration values. For example, there are two nodes: 10.168.10.1 and 10.168.10.2, then we can use 10.168.10.0/24 as the value of priorty_network. - -### Q4. What are FE's Master, Follower and Observer? - -First of all, make it clear that FE has only two roles: Follower and Observer. The Master is just an FE selected from a group of Follower nodes. Master can be regarded as a special kind of Follower. So when we were asked how many FEs in a cluster and what roles are they, the correct answer should be the number of all FE nodes, the number of Follower roles, and the number of Observer roles. - -All FE nodes in the Follower role will form a selectable group, similar to the group concept in the Poxas consensus protocol. A Follower will be elected as the Master in the group. When the Master hangs up, the new Follower will be automatically selected as the Master. Observer will not participate in the election, so Observer will not be called Master. - -A metadata log needs to be successfully written in most Follower nodes to be considered successful. For example, if 3 FEs are written, 2 writes are successful. This is why the number of Follower roles needs to be an odd number. - -The role of Observer is the same as the meaning of this word. It only acts as an observer to synchronize the metadata logs that have been successfully written, and provides metadata reading services. He will not participate in the logic of majority writing. - -Normally, you can deploy 1 Follower + 2 Observer or 3 Follower + N Observer. The former is simple to operate and maintain, and there will be almost no consensus agreement between Followers to cause this complicated error situation (most of Baidu's internal clusters use this method). The latter can ensure the high availability of metadata writing. If it is a high-concurrency query scenario, you can appropriately increase the Observer. - -### Q5. Does Doris support modifying column names? - -Does not support modifying column names. - -Doris supports modifying database names, table names, partition names, materialized view (Rollup) names, and column types, comments, default values, and so on. But unfortunately, currently does not support modifying the column name. - -For some historical reasons, the column names are currently written directly into the data file. When Doris searches, he also finds the corresponding column by the class name. Therefore, modifying column names is not only a simple metadata modification, but also involves data rewriting, which is a very heavy operation. - -We do not rule out the subsequent use of some compatible means to support lightweight column name modification operations. - -### Q6. Does the table of the Unique Key model support the creation of materialized views? - -not support. - -The table of the Unique Key model is a business-friendly table. Because of its unique function of de-duplication according to the primary key, it can easily synchronize business databases with frequent data changes. Therefore, many users will first consider using the Unique Key model when accessing data to Doris. - -Unfortunately, the table of the Unique Key model cannot create a materialized view. The reason is that the nature of the materialized view is to "pre-calculate" the data through pre-calculation, so that the calculated data is directly returned during the query to speed up the query. In the materialized view, the "pre-calculated" data is usually some aggregated indicators, such as summation and count. At this time, if the data changes, such as udpate or delete, because the pre-calculated data has lost the detailed information, it cannot be updated synchronously. For example, a sum of 5 may be 1+4 or 2+3. Because of the loss of detailed information, we cannot distinguish how the sum is calculated, and therefore cannot meet the update requirements. - -### Q7. show backends/frontends Viewed information is incomplete - -After executing certain statements such as `show backends/frontends`, some columns may be incomplete in the results. For example, the disk capacity information cannot be seen in the show backends results. - -This problem usually occurs when there are multiple FEs in the cluster. If users connect to non-Master FE nodes to execute these statements, they will see incomplete information. This is because part of the information only exists in the Master FE node. Such as BE's disk usage information. Therefore, the complete information can be obtained only after the Master FE is directly connected. - -Of course, the user can also execute `set forward_to_master=true;` before executing these statements. After the session variable is set to true, some information viewing statements executed later will be automatically forwarded to the Master FE to obtain the results. In this way, no matter which FE the user is connected to, the complete result can be obtained. - -### Q8. A new disk is added to the node. Why is the data not balanced on the new disk? - -The current balance strategy of Doris is based on nodes. In other words, the cluster load is judged according to the overall load index of the node (the number of shards and the total disk utilization). And migrate data fragments from high-load nodes to low-load nodes. If each node adds a disk, from the perspective of the node as a whole, the load has not changed, so the balancing logic cannot be triggered. - -In addition, Doris currently does not support balanced operations within a single node and between various disks. Therefore, after adding a new disk, the data will not be balanced to the new disk. - -However, when data is migrated between nodes, Doris will consider the disk factor. For example, if a slice is migrated from node A to node B, the disk with lower disk space utilization among node B will be selected first. - -Here we provide 3 ways to solve this problem: - -1. Rebuild the new table - - Create a new table through the create table like statement, and then use insert into select to synchronize the data from the old table to the new table. Because when a new table is created, the data fragments of the new table will be distributed on the new disk, and the data will also be written to the new disk. This method is suitable for situations where the amount of data is small (within tens of GB). - -2. Through the Decommission command - - The decommission command is used to safely decommission a BE node. This command will first migrate the data fragments on the node to other nodes, and then delete the node. As mentioned earlier, when data is migrated, disks with low disk utilization will be given priority, so this method can "force" the data to be migrated to the disks of other nodes. When the data migration is completed, we cancel the decommission operation, so that the data will be rebalanced back to this node. When we perform the above steps for all BE nodes, the data will be evenly distributed on all disks of all nodes. - - Note that before executing the decommission command, execute the following command first to avoid the node being deleted after it is offline. - - `admin set frontend config("drop_backend_after_decommission" = "false");` - -3. Manually migrate data using API - - Doris provides [HTTP API](../administrator-guide/http-actions/tablet-migration-action.md), which allows you to manually specify data fragments on one disk to migrate to another disk. - -### Q9. How to read FE/BE log correctly? - -In many cases, we need to troubleshoot problems through logs. Here is an explanation of the format and viewing method of the FE/BE log. - -1. FE - - FE logs mainly include: - - * fe.log: main log. Including everything except fe.out. - * fe.warn.log: A subset of the main log, which only records WARN and ERROR level logs. - * fe.out: Standard/error output log (stdout and stderr). - * fe.audit.log: Audit log, which records all SQL requests received by this FE. - - A typical FE log is as follows: - - ``` - 2021-09-16 23:13:22,502 INFO (tablet scheduler|43) [BeLoadRebalancer.selectAlternativeTabletsForCluster():85] cluster is balance: default_cluster with medium: HDD. skip - ``` - - * `2021-09-16 23:13:22,502`: log time. - * `INFO: log level, the default is INFO`. - * `(tablet scheduler|43)`: thread name and thread id. Through the thread id, you can view the thread context information and troubleshoot what happened in this thread. - * `BeLoadRebalancer.selectAlternativeTabletsForCluster():85`: class name, method name and code line number. - * `cluster is balance xxx`: log content. - - Normally, we mainly check the fe.log log. Under special circumstances, some logs may be output to fe.out. - -2. BE - - The BE logs mainly include: - - * be.INFO: Main log. This is actually a soft connection, connected to the latest be.INFO.xxxx. - * be.WARNING: A subset of the main log, only logs of WARN and FATAL levels are recorded. This is actually a soft connection, connected to the latest be.WARN.xxxx. - * be.out: standard/error output log (stdout and stderr). - - A typical BE log is as follows: - - ``` - I0916 23:21:22.038795 28087 task_worker_pool.cpp:1594] finish report TASK. master host: 10.10.10.10, port: 9222 - ``` - - * `I0916 23:21:22.038795`: Log level and date and time. The capital letter I means INFO, W means WARN, and F means FATAL. - * `28087`: thread id. Through the thread id, you can view the thread context information and troubleshoot what happened in this thread. - * `task_worker_pool.cpp:1594`: code file and line number. - * `finish report TASK xxx`: log content. - - Normally, we mainly check the be.INFO log. Under special circumstances, such as BE downtime, you need to check be.out. - -### Q10. How to troubleshoot the cause of FE/BE node down? - -1. BE - - The BE process is a C/C++ process, and the process may hang due to some program bugs (memory out of bounds, illegal address access, etc.) or Out Of Memory (OOM). At this point, we can check the cause of the error through the following steps: - - 1. View be.out - - The BE process realizes that when the program exits due to an abnormal condition, it will print the current error stack to be.out (note that it is be.out, not be.INFO or be.WARNING). Through the error stack, you can usually get a rough idea of ​​where the program went wrong. - - Note that if an error stack appears in be.out, it is usually due to a program bug, and ordinary users may not be able to solve it by themselves. Welcome to the WeChat group, github discussion or dev mail group for help, and post the corresponding error stack for quick Troubleshoot the problem. - - 2. dmesg - - If be.out has no stack information, it is likely that OOM was forcibly killed by the system. At this point, you can use the dmesg -T command to view the Linux system log. If a log similar to Memory cgroup out of memory: Kill process 7187 (palo_be) score 1007 or sacrifice child appears at the end, it means that it is caused by OOM. - - There may be many reasons for memory problems, such as large queries, imports, compactions, etc. Doris is also constantly optimizing memory usage. Welcome to the WeChat group, github discussion or dev mailing group for help. - - 3. Check whether there are logs starting with F in be.INFO. - - The log at the beginning of F is the Fatal log. For example, F0916 means the Fatal log on September 16. Fatal logs usually indicate program assertion errors, and assertion errors will directly cause the process to exit (indicating that the program has a bug). Welcome to the WeChat group, github discussion or dev mailing group for help. - - 4. Minidump - - Mindump is a feature added after Doris 0.15. For details, please refer to [Document](../developer-guide/minidump.md). - -2. FE - - FE is a java process, and its robustness depends on the C/C++ program. Usually, the cause of FE hanging may be OOM (Out-of-Memory) or metadata writing failure. These errors usually have an error stack in fe.log or fe.out. You need to investigate further based on the error stack information. - -### Q11. About the configuration of the data directory SSD and HDD. - -Doris supports a BE node to configure multiple storage paths. Normally, it is sufficient to configure one storage path for each disk. At the same time, Doris supports storage media attributes of specified paths, such as SSD or HDD. SSD stands for high-speed storage devices, and HDD stands for low-speed storage devices. - -By specifying the storage medium properties of the path, we can use Doris's hot and cold data partition storage function to store hot data in the SSD at the partition level, and the cold data will be automatically transferred to the HDD. - -It should be noted that Doris does not automatically perceive the actual storage medium type of the disk where the storage path is located. This type needs to be explicitly indicated by the user in the path configuration. For example, the path "/path/to/data1.SSD" means that this path is an SSD storage medium. And "data1.SSD" is the actual directory name. Doris determines the storage medium type based on the ".SSD" suffix behind the directory name, not the actual storage medium type. In other words, the user can specify any path as the SSD storage medium, and Doris only recognizes the directory suffix and will not judge whether the storage medium matches. If you do not write the suffix, the default is HDD. - -In other words, ".HDD" and ".SSD" are only used to identify the "relative" "low speed" and "high speed" of the storage directory, not the actual storage medium type. Therefore, if the storage path on the BE node has no difference in media, there is no need to fill in the suffix. - -### Q12. The query results of unique key model are inconsistent - -In some cases, when users use the same SQL to query a table of a unique key model, inconsistent query results may occur. And the query results always change between 2-3 kinds. - -This may be because there are data with the same key but different values in the same batch of imported data, which will lead to inconsistent results between different replicas due to uncertain data replace order. - -For example, tables are defined as k1 and v1. A batch of imported data is as follows: - -``` -1, "abc" -1, "def" -``` - -Then the result of replica 1 may be '1, "ABC', while the result of replica 2 may be '1," def'. This leads to inconsistent query results. - -To ensure the unique data order between different replicas, refer to the [Sequence Column](../administrator-guide/load-data/sequence-column-manual.md) function. - -### Q13. Multiple FEs cannot log in when using Nginx to implement web UI load balancing - -Doris can deploy multiple FEs. When accessing the Web UI, if you use Nginx for load balancing, you will be prompted to log in again because of Session problems. This problem is actually a session sharing problem. Nginx provides centralized session sharing. The solution, here we use the ip_hash technology in nginx, ip_hash can direct the request of a certain ip to the same backend, so that a certain client and a certain backend under this ip can establish a stable The session, ip_hash is defined in the upstream configuration: - -``` -upstream doris.com { - server 172.22.197.238:8030 weight=3; - server 172.22.197.239:8030 weight=4; - server 172.22.197.240:8030 weight=4; - ip_hash; -} -``` -The complete Nginx example configuration is as follows: - -``` -user nginx; -worker_processes auto; -error_log /var/log/nginx/error.log; -pid /run/nginx.pid; - -# Load dynamic modules. See /usr/share/doc/nginx/README.dynamic. -include /usr/share/nginx/modules/*.conf; - -events { - worker_connections 1024; -} - -http { - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent" "$http_x_forwarded_for"'; - - access_log /var/log/nginx/access.log main; - - sendfile on; - tcp_nopush on; - tcp_nodelay on; - keepalive_timeout 65; - types_hash_max_size 2048; - - include /etc/nginx/mime.types; - default_type application/octet-stream; - - # Load modular configuration files from the /etc/nginx/conf.d directory. - # See http://nginx.org/en/docs/ngx_core_module.html#include - # for more information. - include /etc/nginx/conf.d/*.conf; - #include /etc/nginx/custom/*.conf; - upstream doris.com { - server 172.22.197.238:8030 weight=3; - server 172.22.197.239:8030 weight=4; - server 172.22.197.240:8030 weight=4; - ip_hash; - } - - server { - listen 80; - server_name gaia-pro-bigdata-fe02; - if ($request_uri ~ _load) { - return 307 http://$host$request_uri ; - } - - location / { - proxy_pass http://doris.com; - proxy_redirect default; - } - error_page 500 502 503 504 /50x.html; - location = /50x.html { - root html; - } - } - } -``` diff --git a/new-docs/en/faq/install-faq.md b/docs/en/faq/install-faq.md similarity index 100% rename from new-docs/en/faq/install-faq.md rename to docs/en/faq/install-faq.md diff --git a/new-docs/en/faq/sql-faq.md b/docs/en/faq/sql-faq.md similarity index 100% rename from new-docs/en/faq/sql-faq.md rename to docs/en/faq/sql-faq.md diff --git a/new-docs/en/get-starting/get-starting.md b/docs/en/get-starting/get-starting.md similarity index 99% rename from new-docs/en/get-starting/get-starting.md rename to docs/en/get-starting/get-starting.md index a557f962d8..62e35871bb 100644 --- a/new-docs/en/get-starting/get-starting.md +++ b/docs/en/get-starting/get-starting.md @@ -1,6 +1,6 @@ --- { - "title": "Get-Starting", + "title": "Getting-Started", "language": "en" } @@ -25,7 +25,7 @@ specific language governing permissions and limitations under the License. --> -# Apache Doris Get-Starting +# Getting Started ## Environmental preparation diff --git a/docs/en/getting-started/advance-usage.md b/docs/en/getting-started/advance-usage.md deleted file mode 100644 index 3a47a2f7db..0000000000 --- a/docs/en/getting-started/advance-usage.md +++ /dev/null @@ -1,280 +0,0 @@ ---- -{ - "title": "Advanced Use Guide", - "language": "en" -} ---- - - - -# Advanced Use Guide - -Here we introduce some of Doris's advanced features. - -## Table 1 Structural Change - -Schema of the table can be modified using the ALTER TABLE command, including the following modifications: - -* Additional columns -* Delete columns -* Modify column type -* Changing column order - -Examples are given below. - -Schema of Table 1 is as follows: - -``` -+----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------+-------------+------+-------+---------+-------+ -| siteid | int(11) | No | true | 10 | | -| citycode | smallint(6) | No | true | N/A | | -| username | varchar(32) | No | true | | | -| pv | bigint(20) | No | false | 0 | SUM | -+----------+-------------+------+-------+---------+-------+ -``` - -We added a new column of uv, type BIGINT, aggregation type SUM, default value is 0: - -`ALTER TABLE table1 ADD COLUMN uv BIGINT SUM DEFAULT '0' after pv;` - -After successful submission, you can view the progress of the job by following commands: - -`SHOW ALTER TABLE COLUMN;` - -When the job state is FINISHED, the job is completed. The new Schema is in force. - -After ALTER TABLE is completed, you can view the latest Schema through `DESC TABLE`. - -``` -mysql> DESC table1; -+----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------+-------------+------+-------+---------+-------+ -| siteid | int(11) | No | true | 10 | | -| citycode | smallint(6) | No | true | N/A | | -| username | varchar(32) | No | true | | | -| pv | bigint(20) | No | false | 0 | SUM | -| uv | bigint(20) | No | false | 0 | SUM | -+----------+-------------+------+-------+---------+-------+ -5 rows in set (0.00 sec) -``` - -The following command can be used to cancel the job currently being executed: - -`CANCEL ALTER TABLE COLUMN FROM table1` - -For more help, see `HELP ALTER TABLE`. - -## 2 Rollup - -Rollup can be understood as a materialized index structure of Table. **materialized** because data is store as a concrete ("materialized") table independently, and **indexing** means that Rollup can adjust column order to increase the hit rate of prefix index, or reduce key column to increase data aggregation. - -Examples are given below. - -Schema of Table 1 is as follows: - -``` -+----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------+-------------+------+-------+---------+-------+ -| siteid | int(11) | No | true | 10 | | -| citycode | smallint(6) | No | true | N/A | | -| username | varchar(32) | No | true | | | -| pv | bigint(20) | No | false | 0 | SUM | -| uv | bigint(20) | No | false | 0 | SUM | -+----------+-------------+------+-------+---------+-------+ -``` - -For table1 detailed data, siteid, citycode and username form a set of keys, which aggregate the PV field. If the business side often has the need to see the total amount of PV in the city, it can build a rollup with only citycode and pv. - -`ALTER TABLE table1 ADD ROLLUP rollup_city(citycode, pv);` - -After successful submission, you can view the progress of the job by following commands: - -`SHOW ALTER TABLE ROLLUP;` - -When the job state is FINISHED, the job is completed. - -When Rollup is established, you can use `DESC table1 ALL` to view the Rollup information of the table. - -``` -mysql> desc table1 all; -+-------------+----------+-------------+------+-------+--------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+-------------+----------+-------------+------+-------+---------+-------+ -| table1 | siteid | int(11) | No | true | 10 | | -| | citycode | smallint(6) | No | true | N/A | | -| | username | varchar(32) | No | true | | | -| | pv | bigint(20) | No | false | 0 | SUM | -| | uv | bigint(20) | No | false | 0 | SUM | -| | | | | | | | -| rollup_city | citycode | smallint(6) | No | true | N/A | | -| | pv | bigint(20) | No | false | 0 | SUM | -+-------------+----------+-------------+------+-------+---------+-------+ -8 rows in set (0.01 sec) -``` - -The following command can be used to cancel the job currently being executed: - -`CANCEL ALTER TABLE ROLLUP FROM table1;` - -After Rollup is established, the query does not need to specify Rollup to query. Or specify the original table for query. The program automatically determines whether Rollup should be used. Whether Rollup is hit or not can be viewed by the `EXPLAIN your_sql;`. - -For more help, see `HELP ALTER TABLE`. - -## 2 Query of Data Table - -### 2.1 Memory Limitation - -To prevent a user's query from consuming too much memory. Queries are controlled in memory. A query task uses no more than 2GB of memory by default on a single BE node. - -When users use it, if they find a `Memory limit exceeded` error, they usually exceed the memory limit. - -Users should try to optimize their SQL statements when they encounter memory overrun. - -If it is found that 2GB memory cannot be satisfied, the memory parameters can be set manually. - -Display query memory limits: - -``` -mysql> SHOW VARIABLES LIKE "%mem_limit%"; -+---------------+------------+ -| Variable_name | Value | -+---------------+------------+ -| exec_mem_limit| 2147483648 | -+---------------+------------+ -1 row in set (0.00 sec) -``` - -The unit of `exec_mem_limit` is byte, and the value of `exec_mem_limit` can be changed by the `SET` command. If changed to 8GB. - -`SET exec_mem_limit = 8589934592;` - -``` -mysql> SHOW VARIABLES LIKE "%mem_limit%"; -+---------------+------------+ -| Variable_name | Value | -+---------------+------------+ -| exec_mem_limit| 8589934592 | -+---------------+------------+ -1 row in set (0.00 sec) -``` - -> * The above modification is session level and is only valid within the current connection session. Disconnecting and reconnecting will change back to the default value. -> * If you need to modify the global variable, you can set it as follows: `SET GLOBAL exec_mem_limit = 8589934592;` When the setup is complete, disconnect the session and log in again, and the parameters will take effect permanently. - -### 2.2 Query timeout - -The current default query time is set to 300 seconds. If a query is not completed within 300 seconds, the query will be cancelled by the Doris system. Users can use this parameter to customize the timeout time of their applications and achieve a blocking mode similar to wait (timeout). - -View the current timeout settings: - -``` -mysql> SHOW VARIABLES LIKE "%query_timeout%"; -+---------------+-------+ -| Variable_name | Value | -+---------------+-------+ -| QUERY_TIMEOUT | 300 | -+---------------+-------+ -1 row in set (0.00 sec) -``` - -Modify the timeout to 1 minute: - -`SET query timeout =60;` - -> * The current timeout check interval is 5 seconds, so timeouts less than 5 seconds are not very accurate. -> * The above modifications are also session level. Global validity can be modified by `SET GLOBAL`. - -### 2.3 Broadcast/Shuffle Join - -The system implements Join operator in two ways: - -Broadcast join: conditionally filtering right hand tables, broadcasting them to the nodes where the large tables are located, forming a memory Hash table, and then streaming out the data of the large tables Hash Join. - -Shuffle join: tables in both side are Hash according to Join's key, and then distributed Join. This memory consumption is allocated to all computing nodes in the cluster. - -Broadcast join is perfermance better when right hand table size is really small, vice versa. - -Doris will try to use Broadcast Join first. You can specify how each join operator is implemented explicitly. System provides configurable parameter `auto_broadcast_join_threshold` to configure the maximum percentage of execute memory could used for build hash table for broadcast join. The meaningful values range from `0` to `1`, and the default value is `0.8`. System will use shuffle join when broadcast join used memory more than it. - -You can turn off broadcast join by set `auto_broadcast_join_threshold` to negative or `0`. - -Choose join implementation automaticaly (default): - -``` -mysql> select sum(table1.pv) from table1 join table2 where table1.siteid = 2; -+--------------------+ -| sum(`table1`.`pv`) | -+--------------------+ -| 10 | -+--------------------+ -1 row in set (0.20 sec) -``` - -Use Broadcast Join (explicitly specified): - -``` -mysql> select sum(table1.pv) from table1 join [broadcast] table2 where table1.siteid = 2; -+--------------------+ -| sum(`table1`.`pv`) | -+--------------------+ -| 10 | -+--------------------+ -1 row in set (0.20 sec) -``` - -Shuffle Join: - -``` -mysql> select sum(table1.pv) from table1 join [shuffle] table2 where table1.siteid = 2; -+--------------------+ -| sum(`table1`.`pv`) | -+--------------------+ -| 10 | -+--------------------+ -1 row in set (0.15 sec) -``` - -### 2.4 Query Retry and High Availability - -When multiple FE nodes are deployed, users can deploy load balancing layers on top of multiple FEs to achieve high availability of Doris. - -Here are some highly available solutions: - -**The first** - -I retry and load balancing in application layer code. For example, if a connection is found to be dead, it will automatically retry on other connections. Application-level code retry requires the application to configure multiple Doris front-end node addresses. - -**Second** - -If you use MySQL JDBC connector to connect Doris, you can use jdbc's automatic retry mechanism: - -``` -jdbc:mysql://[host1][:port1],[host2][:port2][,[host3][:port3]]...[/[database]][?propertyName1=propertyValue1[&propertyName2=propertyValue2]...] -``` - -**The third** - -Applications can connect to and deploy MySQL Proxy on the same machine by configuring MySQL Proxy's Failover and Load Balance functions. - -`http://dev.mysql.com/doc/refman/5.6/en/mysql-proxy-using.html` \ No newline at end of file diff --git a/docs/en/getting-started/basic-usage.md b/docs/en/getting-started/basic-usage.md deleted file mode 100644 index fedd7cd009..0000000000 --- a/docs/en/getting-started/basic-usage.md +++ /dev/null @@ -1,382 +0,0 @@ ---- -{ - "title": "Guidelines for Basic Use", - "language": "en" -} ---- - - - - -# Guidelines for Basic Use - -Doris uses MySQL protocol to communicate. Users can connect to Doris cluster through MySQL client or MySQL JDBC. When selecting the MySQL client version, it is recommended to use the version after 5.1, because user names of more than 16 characters cannot be supported before 5.1. This paper takes MySQL client as an example to show users the basic usage of Doris through a complete process. - -## 1 Create Users - -### 1.1 Root User Logon and Password Modification - -Doris has built-in root and admin users, and the password is empty by default. After starting the Doris program, you can connect to the Doris cluster through root or admin users. -Use the following command to log in to Doris: - -``` -mysql -h FE_HOST -P9030 -uroot -``` - ->` fe_host` is the IP address of any FE node. ` 9030 ` is the query_port configuration in fe.conf. - -After login, you can modify the root password by following commands - -``` -SET PASSWORD FOR 'root' = PASSWORD('your_password'); -``` - -### 1.3 Creating New Users - -Create an ordinary user with the following command. - -``` -CREATE USER 'test' IDENTIFIED BY 'test_passwd'; -``` - -Follow-up login can be done through the following connection commands. - -``` -mysql -h FE_HOST -P9030 -utest -ptest_passwd -``` - -> By default, the newly created common user does not have any permissions. Permission grants can be referred to later permission grants. - -## 2 Data Table Creation and Data Import - -### 2.1 Create a database - -Initially, a database can be created through root or admin users: - -`CREATE DATABASE example_db;` - -> All commands can use `HELP` command to see detailed grammar help. For example: `HELP CREATE DATABASE;` - -> If you don't know the full name of the command, you can use "help command a field" for fuzzy query. If you type `HELP CREATE`, you can match commands like `CREATE DATABASE`, `CREATE TABLE`, `CREATE USER`, etc. - -After the database is created, you can view the database information through `SHOW DATABASES'. - -``` -MySQL> SHOW DATABASES; -+--------------------+ -| Database | -+--------------------+ -| example_db | -| information_schema | -+--------------------+ -2 rows in set (0.00 sec) -``` - -Information_schema exists to be compatible with MySQL protocol. In practice, information may not be very accurate. Therefore, information about specific databases is suggested to be obtained by directly querying the corresponding databases. - -### 2.2 Account Authorization - -After the example_db is created, the read and write permissions of example_db can be authorized to ordinary accounts, such as test, through the root/admin account. After authorization, the example_db database can be operated by logging in with the test account. - -`GRANT ALL ON example_db TO test;` - -### 2.3 Formulation - -Create a table using the `CREATE TABLE` command. More detailed parameters can be seen: - -`HELP CREATE TABLE;` - -First switch the database: - -`USE example_db;` - -Doris supports single partition and composite partition. - -In the composite partition: - -* The first level is called Partition, or partition. Users can specify a dimension column as a partition column (currently only integer and time type columns are supported), and specify the range of values for each partition. - -* The second stage is called Distribution, or bucket division. Users can specify one or more dimension columns and the number of buckets for HASH distribution of data. - -Composite partitioning is recommended for the following scenarios - -* There are time dimensions or similar dimensions with ordered values, which can be used as partition columns. The partition granularity can be evaluated according to the frequency of importation and the amount of partition data. -* Historic data deletion requirements: If there is a need to delete historical data (for example, only the last N days of data are retained). Using composite partitions, you can achieve this by deleting historical partitions. Data can also be deleted by sending a DELETE statement within a specified partition. -* Solve the data skew problem: Each partition can specify the number of buckets separately. If dividing by day, when the amount of data varies greatly every day, we can divide the data of different partitions reasonably by the number of buckets in the specified partition. Bucket columns recommend choosing columns with high degree of differentiation. - -Users can also use no composite partitions, even single partitions. Then the data are only distributed by HASH. - -Taking the aggregation model as an example, the following two partitions are illustrated separately. - -#### Single partition - -Create a logical table with the name table1. The number of barrels is 10. - -The schema of this table is as follows: - -* Siteid: Type is INT (4 bytes), default value is 10 bytes. -* citycode: The type is SMALLINT (2 bytes). -* username: The type is VARCHAR, the maximum length is 32 bytes, and the default value is an empty string. -* pv: Type is BIGINT (8 bytes), default value is 0 byte; this is an index column, Doris will aggregate the index column internally, the aggregation method of this column is SUM. - -The TABLE statement is as follows: -``` -CREATE TABLE table1 -( - siteid INT DEFAULT '10', - citycode SMALLINT, - username VARCHAR(32) DEFAULT '', - pv BIGINT SUM DEFAULT '0' -) -AGGREGATE KEY(siteid, citycode, username) -DISTRIBUTED BY HASH(siteid) BUCKETS 10 -PROPERTIES("replication_num" = "1"); -``` - -#### Composite partition - -Create a logical table named table2. - -The schema of this table is as follows: - -* event_day: Type DATE, no default -* Siteid: Type is INT (4 bytes), default value is 10 bytes. -* citycode: The type is SMALLINT (2 bytes). -* username: The type is VARCHAR, the maximum length is 32 bytes, and the default value is an empty string. -* pv: Type is BIGINT (8 bytes), default value is 0 byte; this is an index column, Doris will aggregate the index column internally, the aggregation method of this column is SUM. - -We use the event_day column as the partition column to create three partitions: p201706, p201707, and p201708. - -* p201706: Range [Minimum, 2017-07-01) -* p201707: Scope [2017-07-01, 2017-08-01) -* p201708: Scope [2017-08-01, 2017-09-01) - -> Note that the interval is left closed and right open. - -Each partition uses siteid to hash buckets, with a bucket count of 10 - -The TABLE statement is as follows: -``` -CREATE TABLE table2 -( - event_day DATE, - siteid INT DEFAULT '10', - citycode SMALLINT, - username VARCHAR(32) DEFAULT '', - pv BIGINT SUM DEFAULT '0' -) -AGGREGATE KEY(event_day, siteid, citycode, username) -PARTITION BY RANGE(event_day) -( - PARTITION p201706 VALUES LESS THAN ('2017-07-01'), - PARTITION p201707 VALUES LESS THAN ('2017-08-01'), - PARTITION p201708 VALUES LESS THAN ('2017-09-01') -) -DISTRIBUTED BY HASH(siteid) BUCKETS 10 -PROPERTIES("replication_num" = "1"); -``` - -After the table is built, you can view the information of the table in example_db: - -``` -MySQL> SHOW TABLES; -+----------------------+ -| Tables_in_example_db | -+----------------------+ -| table1 | -| table2 | -+----------------------+ -2 rows in set (0.01 sec) - -MySQL> DESC table1; -+----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------+-------------+------+-------+---------+-------+ -| siteid | int(11) | Yes | true | 10 | | -| citycode | smallint(6) | Yes | true | N/A | | -| username | varchar(32) | Yes | true | | | -| pv | bigint(20) | Yes | false | 0 | SUM | -+----------+-------------+------+-------+---------+-------+ -4 rows in set (0.00 sec) - -MySQL> DESC table2; -+-----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+-----------+-------------+------+-------+---------+-------+ -| event_day | date | Yes | true | N/A | | -| siteid | int(11) | Yes | true | 10 | | -| citycode | smallint(6) | Yes | true | N/A | | -| username | varchar(32) | Yes | true | | | -| pv | bigint(20) | Yes | false | 0 | SUM | -+-----------+-------------+------+-------+---------+-------+ -5 rows in set (0.00 sec) -``` - -> Notes: -> -> 1. By setting replication_num, the above tables are all single-copy tables. Doris recommends that users adopt the default three-copy settings to ensure high availability. -> 2. Composite partition tables can be added or deleted dynamically. See the Partition section in `HELP ALTER TABLE`. -> 3. Data import can import the specified Partition. See `HELP LOAD`. -> 4. Schema of table can be dynamically modified. -> 5. Rollup can be added to Table to improve query performance. This section can be referred to the description of Rollup in Advanced Usage Guide. -> 6. The default value of Null property for column is true, which may result in poor scan performance. - -### 2.4 Import data - -Doris supports a variety of data import methods. Specifically, you can refer to the data import document. Here we use streaming import and Broker import as examples. - -#### Flow-in - -Streaming import transfers data to Doris via HTTP protocol. It can import local data directly without relying on other systems or components. Detailed grammar help can be found in `HELP STREAM LOAD;` - -Example 1: With "table1_20170707" as Label, import table1 tables using the local file table1_data. - -``` -curl --location-trusted -u test:test_passwd -H "label:table1_20170707" -H "column_separator:," -T table1_data http://FE_HOST:8030/api/example_db/table1/_stream_load -``` - -> 1. FE_HOST is the IP of any FE node and 8030 is http_port in fe.conf. -> 2. You can use the IP of any BE and the webserver_port in be.conf to connect the target left and right for import. For example: `BE_HOST:8040` - -The local file `table1_data` takes `,` as the separation between data, and the specific contents are as follows: - -``` -1,1,Jim,2 -2,1,grace,2 -3,2,tom,2 -4,3,bush,3 -5,3,helen,3 -``` - -Example 2: With "table2_20170707" as Label, import table2 tables using the local file table2_data. - -``` -curl --location-trusted -u test:test -H "label:table2_20170707" -H "column_separator:|" -T table2_data http://127.0.0.1:8030/api/example_db/table2/_stream_load -``` - -The local file `table2_data`is separated by `|`. The details are as follows: - -``` -2017-07-03|1|1|jim|2 -2017-07-05|2|1|grace|2 -2017-07-12|3|2|tom|2 -2017-07-15|4|3|bush|3 -2017-07-12|5|3|helen|3 -``` - -> Notes: -> -> 1. The recommended file size for streaming import is limited to 10GB. Excessive file size will result in higher cost of retry failure. -> 2. Each batch of imported data needs to take a Label. Label is best a string related to a batch of data for easy reading and management. Doris based on Label guarantees that the same batch of data can be imported only once in a database. Label for failed tasks can be reused. -> 3. Streaming imports are synchronous commands. The successful return of the command indicates that the data has been imported, and the failure of the return indicates that the batch of data has not been imported. - -#### Broker Load - -Broker imports import data from external storage through deployed Broker processes. For more help, see `HELP BROKER LOAD;` - -Example: Import files on HDFS into table1 table with "table1_20170708" as Label - -``` -LOAD LABEL table1_20170708 -( - DATA INFILE("hdfs://your.namenode.host:port/dir/table1_data") - INTO TABLE table1 -) -WITH BROKER hdfs -( - "username"="hdfs_user", - "password"="hdfs_password" -) -PROPERTIES -( - "timeout"="3600", - "max_filter_ratio"="0.1" -); -``` - -Broker imports are asynchronous commands. Successful execution of the above commands only indicates successful submission of tasks. Successful imports need to be checked through `SHOW LOAD;' Such as: - -`SHOW LOAD WHERE LABEL = "table1_20170708";` - -In the return result, FINISHED in the `State` field indicates that the import was successful. - -For more instructions on `SHOW LOAD`, see` HELP SHOW LOAD; ` - -Asynchronous import tasks can be cancelled before the end: - -`CANCEL LOAD WHERE LABEL = "table1_20170708";` - -## 3 Data query - -### 3.1 Simple Query - -Examples: - -``` -MySQL> SELECT * FROM table1 LIMIT 3; -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 2 | 1 | 'grace' | 2 | -| 5 | 3 | 'helen' | 3 | -| 3 | 2 | 'tom' | 2 | -+--------+----------+----------+------+ -3 rows in set (0.01 sec) - -MySQL> SELECT * FROM table1 ORDER BY citycode; -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 2 | 1 | 'grace' | 2 | -| 1 | 1 | 'jim' | 2 | -| 3 | 2 | 'tom' | 2 | -| 4 | 3 | 'bush' | 3 | -| 5 | 3 | 'helen' | 3 | -+--------+----------+----------+------+ -5 rows in set (0.01 sec) -``` - -### 3.3 Join Query - -Examples: - -``` -MySQL> SELECT SUM(table1.pv) FROM table1 JOIN table2 WHERE table1.siteid = table2.siteid; -+--------------------+ -| sum(`table1`.`pv`) | -+--------------------+ -| 12 | -+--------------------+ -1 row in set (0.20 sec) -``` - -### 3.4 Subquery - -Examples: - -``` -MySQL> SELECT SUM(pv) FROM table2 WHERE siteid IN (SELECT siteid FROM table1 WHERE siteid > 2); -+-----------+ -| sum(`pv`) | -+-----------+ -| 8 | -+-----------+ -1 row in set (0.13 sec) -``` diff --git a/docs/en/getting-started/best-practice.md b/docs/en/getting-started/best-practice.md deleted file mode 100644 index 930bdb3a86..0000000000 --- a/docs/en/getting-started/best-practice.md +++ /dev/null @@ -1,198 +0,0 @@ ---- -{ - "title": "Best Practices", - "language": "en" -} ---- - - - - -# Best Practices - -## 1 tabulation - -### 1.1 Data Model Selection - -Doris data model is currently divided into three categories: AGGREGATE KEY, UNIQUE KEY, DUPLICATE KEY. Data in all three models are sorted by KEY. - -1.1.1. AGGREGATE KEY - -When AGGREGATE KEY is the same, old and new records are aggregated. The aggregation functions currently supported are SUM, MIN, MAX, REPLACE. - -AGGREGATE KEY model can aggregate data in advance and is suitable for reporting and multi-dimensional analysis business. - -``` -CREATE TABLE site_visit -( -siteid INT, -City: SMALLINT, -username VARCHAR (32), -pv BIGINT SUM DEFAULT '0' -) -AGGREGATE KEY(siteid, city, username) -DISTRIBUTED BY HASH(siteid) BUCKETS 10; -``` - -1.1.2. KEY UNIQUE - -When UNIQUE KEY is the same, the new record covers the old record. At present, UNIQUE KEY implements the same REPLACE aggregation method as AGGREGATE KEY, and they are essentially the same. Suitable for analytical business with updated requirements. - -``` -CREATE TABLE sales_order -( -orderid BIGINT, -status TINYINT, -username VARCHAR (32), -amount BIGINT DEFAULT '0' -) -KEY (orderid) UNIT -DISTRIBUTED BY HASH(orderid) BUCKETS 10; -``` - -1.1.3. DUPLICATE KEY - -Only sort columns are specified, and the same rows are not merged. It is suitable for the analysis business where data need not be aggregated in advance. - -``` -CREATE TABLE session_data -( -visitorid SMALLINT, -sessionid BIGINT, -visit time DATETIME, -City CHAR (20), -province CHAR(20), -ip. varchar (32), -brower CHAR(20), -url: VARCHAR (1024) -) -DUPLICATE KEY (visitor time, session time) -DISTRIBUTED BY HASH(sessionid, visitorid) BUCKETS 10; -``` - -### 1.2 Wide Table vs. Star Schema - -In order to adapt to the front-end business, business side often does not distinguish dimension information from indicator information, but defines Schema as a wide table. For Doris, the performance of such wide gauges is often unsatisfactory: - -* There are many fields in Schema, and there may be more key columns in the aggregation model. The number of columns that need to be sorted in the import process will increase. -* Dimensional information updates are reflected in the whole table, and the frequency of updates directly affects the efficiency of queries. - -In the process of using Star Schema, users are advised to use Star Schema to distinguish dimension tables from indicator tables as much as possible. Frequently updated dimension tables can also be placed in MySQL external tables. If there are only a few updates, they can be placed directly in Doris. When storing dimension tables in Doris, more copies of dimension tables can be set up to improve Join's performance. - -### 1.3 Partitioning and Bucketing - -Doris supports two-level partitioned storage. The first level is partition, which currently supports both RANGE and LIST partition types, and the second layer is HASH bucket. - -1.3.1. Partitioning - -Partition is used to divide data into different intervals, which can be logically understood as dividing the original table into multiple sub-tables. Data can be easily managed by partition, for example, to delete data more quickly. - -1.3.1.1. Range Partitioning - -In business, most users will choose to partition on time, which has the following advantages: - -* Differentiable heat and cold data -* Availability of Doris Hierarchical Storage (SSD + SATA) - -1.3.1.2. List Partitioning - -In business,, users can select cities or other enumeration values for partition. - -1.3.2. Hash Bucketing - -The data is divided into different buckets according to the hash value. - -* It is suggested that columns with large differentiation should be used as buckets to avoid data skew. -* In order to facilitate data recovery, it is suggested that the size of a single bucket should not be too large and should be kept within 10GB. Therefore, the number of buckets should be considered reasonably when building tables or increasing partitions, among which different partitions can specify different buckets. - -### 1.4 Sparse Index and Bloom Filter - -Doris stores the data in an orderly manner, and builds a sparse index for Doris on the basis of ordered data. The index granularity is block (1024 rows). - -Sparse index chooses fixed length prefix in schema as index content, and Doris currently chooses 36 bytes prefix as index. - -* When building tables, it is suggested that the common filter fields in queries should be placed in front of Schema. The more distinguishable the query fields are, the more frequent the query fields are. -* One particular feature of this is the varchar type field. The varchar type field can only be used as the last field of the sparse index. The index is truncated at varchar, so if varchar appears in front, the length of the index may be less than 36 bytes. Specifically, you can refer to [data model, ROLLUP and prefix index] (. / data-model-rollup. md). -* In addition to sparse index, Doris also provides bloomfilter index. Bloomfilter index has obvious filtering effect on columns with high discrimination. If you consider that varchar cannot be placed in a sparse index, you can create a bloomfilter index. - -### 1.5 Physical and Chemical View (rollup) - -Rollup can essentially be understood as a physical index of the original table. When creating Rollup, only some columns in Base Table can be selected as Schema. The order of fields in Schema can also be different from that in Base Table. - -Rollup can be considered in the following cases: - -1.5.1. Low ratio of data aggregation in the Base Table - -This is usually due to the fact that Base Table has more differentiated fields. At this point, you can consider selecting some columns and establishing Rollup. - -For the `site_visit'table: - -``` -site -u visit (siteid, city, username, pv) -``` - -Siteid may lead to a low degree of data aggregation. If business parties often base their PV needs on city statistics, they can build a city-only, PV-based rollup: - -``` -ALTER TABLE site_visit ADD ROLLUP rollup_city(city, pv); -``` - -1.5.2. The prefix index in Base Table cannot be hit - -Generally, the way Base Table is constructed cannot cover all query modes. At this point, you can consider adjusting the column order and establishing Rollup. - -Database Session - -``` -session -u data (visitorid, sessionid, visittime, city, province, ip, browser, url) -``` - -In addition to visitorid analysis, there are Brower and province analysis cases, Rollup can be established separately. - -``` -ALTER TABLE session_data ADD ROLLUP rollup_brower(brower,province,ip,url) DUPLICATE KEY(brower,province); -``` - -## 2 Schema Change - -There are three Schema Change in doris: Sorted Schema Change, Direct Schema Change, Linked Schema Change. - -2.1. Sorted Schema Change - -The sorting of columns has been changed and the data needs to be reordered. For example, delete a column in a sorted column and reorder the fields. - -``` -ALTER TABLE site_visit DROP COLUMN city; -``` - -2.2. Direct Schema Change: There is no need to reorder, but there is a need to convert the data. For example, modify - the type of column, add a column to the sparse index, etc. - -``` -ALTER TABLE site_visit MODIFY COLUMN username varchar(64); -``` - -2.3. Linked Schema Change: No need to transform data, for example add columns. - -``` -ALTER TABLE site_visit ADD COLUMN click bigint SUM default '0'; -``` - -Schema is recommended to be considered when creating tables so that Schema can be changed more quickly. diff --git a/docs/en/getting-started/data-model-rollup.md b/docs/en/getting-started/data-model-rollup.md deleted file mode 100644 index d70e064eb4..0000000000 --- a/docs/en/getting-started/data-model-rollup.md +++ /dev/null @@ -1,636 +0,0 @@ ---- -{ - "title": "Data Model, ROLLUP and Prefix Index", - "language": "en" -} ---- - - - -# Data Model, ROLLUP and Prefix Index - -This document describes Doris's data model, ROLLUP and prefix index concepts at the logical level to help users better use Doris to cope with different business scenarios. - -## Basic concepts - -In Doris, data is logically described in the form of tables. -A table consists of rows and columns. Row is a row of user data. Column is used to describe different fields in a row of data. - -Columns can be divided into two categories: Key and Value. From a business perspective, Key and Value can correspond to dimension columns and indicator columns, respectively. - -Doris's data model is divided into three main categories: - -* Aggregate -* Unique -* Duplicate - -Let's introduce them separately. - -## Aggregate Model - -We illustrate what aggregation model is and how to use it correctly with practical examples. - -### Example 1: Importing data aggregation - -Assume that the business has the following data table schema: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -| userid | LARGEINT | | user id| -| date | DATE | | date of data filling| -| City | VARCHAR (20) | | User City| -| age | SMALLINT | | User age| -| sex | TINYINT | | User gender| -| Last_visit_date | DATETIME | REPLACE | Last user access time| -| Cost | BIGINT | SUM | Total User Consumption| -| max dwell time | INT | MAX | Maximum user residence time| -| min dwell time | INT | MIN | User minimum residence time| - -If converted into a table-building statement, the following is done (omitting the Partition and Distribution information in the table-building statement) - -``` -CREATE TABLE IF NOT EXISTS example_db.expamle_tbl -( - `user_id` LARGEINT NOT NULL COMMENT "user id", - `date` DATE NOT NULL COMMENT "data import time", - `city` VARCHAR(20) COMMENT "city", - `age` SMALLINT COMMENT "age", - `sex` TINYINT COMMENT "gender", - `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "last visit date time", - `cost` BIGINT SUM DEFAULT "0" COMMENT "user total cost", - `max_dwell_time` INT MAX DEFAULT "0" COMMENT "user max dwell time", - `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "user min dwell time" -) -AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`) -... /* ignore Partition and Distribution */ -; -``` - -As you can see, this is a typical fact table of user information and access behavior. -In general star model, user information and access behavior are stored in dimension table and fact table respectively. Here, in order to explain Doris's data model more conveniently, we store the two parts of information in a single table. - -The columns in the table are divided into Key (dimension column) and Value (indicator column) according to whether `AggregationType`is set or not. No `AggregationType`, such as `user_id`, `date`, `age`, etc., is set as **Key**, while Aggregation Type is set as **Value**. - -When we import data, the same rows and aggregates into one row for the Key column, while the Value column aggregates according to the set `AggregationType`. `AggregationType`currently has the following four ways of aggregation: - -1. SUM: Sum, multi-line Value accumulation. -2. REPLACE: Instead, Values in the next batch of data will replace Values in rows previously imported. -3. MAX: Keep the maximum. -4. MIN: Keep the minimum. - -Suppose we have the following imported data (raw data): - -|user\_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10| -| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2| -| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| -| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| -| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| -| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| -| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| - -Let's assume that this is a table that records the user's behavior in accessing a commodity page. Let's take the first row of data as an example and explain it as follows: - -| Data | Description| -|---|---| -| 10000 | User id, each user uniquely identifies id| -| 2017-10-01 | Data storage time, accurate to date| -| Beijing | User City| -| 20 | User Age| -| 0 | Gender male (1 for female)| -| 2017-10-01 06:00 | User's time to visit this page, accurate to seconds| -| 20 | Consumption generated by the user's current visit| -| 10 | User's visit, time to stay on the page| -| 10 | User's current visit, time spent on the page (redundancy)| - -Then when this batch of data is imported into Doris correctly, the final storage in Doris is is as follows: - -|user\_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 35 | 10 | 2| -| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| -| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| -| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| -| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| -| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| - -As you can see, there is only one line of aggregated data left for 10,000 users. The data of other users are consistent with the original data. Here we first explain the aggregated data of user 10000: - -The first five columns remain unchanged, starting with column 6 `last_visit_date': - -* `2017-10-01 07:00`: Because the `last_visit_date`column is aggregated by REPLACE, the `2017-10-01 07:00` column has been replaced by `2017-10-01 06:00'. -> Note: For data in the same import batch, the order of replacement is not guaranteed for the aggregation of REPLACE. For example, in this case, it may be `2017-10-01 06:00'. For data from different imported batches, it can be guaranteed that the data from the latter batch will replace the former batch. - -* `35`: Because the aggregation type of the `cost'column is SUM, 35 is accumulated from 20 + 15. -* `10`: Because the aggregation type of the`max_dwell_time'column is MAX, 10 and 2 take the maximum and get 10. -* `2`: Because the aggregation type of `min_dwell_time'column is MIN, 10 and 2 take the minimum value and get 2. - -After aggregation, Doris ultimately only stores aggregated data. In other words, detailed data will be lost and users can no longer query the detailed data before aggregation. - -### Example 2: Keep detailed data - -Following example 1, we modify the table structure as follows: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -| userid | LARGEINT | | user id| -| date | DATE | | date of data filling| -| Time stamp | DATETIME | | Data filling time, accurate to seconds| -| City | VARCHAR (20) | | User City| -| age | SMALLINT | | User age| -| sex | TINYINT | | User gender| -| Last visit date | DATETIME | REPLACE | Last user access time| -| Cost | BIGINT | SUM | Total User Consumption| -| max dwell time | INT | MAX | Maximum user residence time| -| min dwell time | INT | MIN | User minimum residence time| - -That is to say, a column of `timestamp` has been added to record the data filling time accurate to seconds. - -The imported data are as follows: - -|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---|---| -| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10| -| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2| -| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| -| 10002 | 2017-10-02 | 2017-10-02 13:10:00 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| -| 10003 | 2017-10-02 | 2017-10-02 13:15:00 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| -| 10004 | 2017-10-01 | 2017-10-01 12:12:48 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| -| 10004 | 2017-10-03 | 2017-10-03 12:38:20 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| - -Then when this batch of data is imported into Doris correctly, the final storage in Doris is is as follows: - -|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---|---| -| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10| -| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2| -| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| -| 10002 | 2017-10-02 | 2017-10-02 13:10:00 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| -| 10003 | 2017-10-02 | 2017-10-02 13:15:00 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| -| 10004 | 2017-10-01 | 2017-10-01 12:12:48 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| -| 10004 | 2017-10-03 | 2017-10-03 12:38:20 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| - -We can see that the stored data, just like the imported data, does not aggregate at all. This is because, in this batch of data, because the `timestamp` column is added, the Keys of all rows are **not exactly the same**. That is, as long as the keys of each row are not identical in the imported data, Doris can save the complete detailed data even in the aggregation model. - -### Example 3: Importing data and aggregating existing data - -Take Example 1. Suppose that the data in the table are as follows: - -|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 35 | 10 | 2| -| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| -| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| -| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| -| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| -| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| - -We imported a new batch of data: - -|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 11:22:00 | 44 | 19 | 19| -| 10005 | 2017-10-03 | Changsha | 29 | 1 | 2017-10-03 18:11:02 | 3 | 1 | 1| - -Then when this batch of data is imported into Doris correctly, the final storage in Doris is is as follows: - -|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -| 10000 | 2017-10-01 | Beijing | 20 | 0 | 2017-10-01 07:00 | 35 | 10 | 2| -| 10001 | 2017-10-01 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| -| 10002 | 2017-10-02 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| -| 10003 | 2017-10-02 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| -| 10004 | 2017-10-01 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| -| 10004 | 2017-10-03 | Shenzhen | 35 | 0 | 2017-10-03 11:22:00 | 55 | 19 | 6| -| 10005 | 2017-10-03 | Changsha | 29 | 1 | 2017-10-03 18:11:02 | 3 | 1 | 1| - -As you can see, the existing data and the newly imported data of user 10004 have been aggregated. At the same time, 10005 new user's data were added. - -Data aggregation occurs in Doris in the following three stages: - -1. The ETL stage of data import for each batch. This phase aggregates data within each batch of imported data. -2. The stage in which the underlying BE performs data Compaction. At this stage, BE aggregates data from different batches that have been imported. -3. Data query stage. In data query, the data involved in the query will be aggregated accordingly. - -Data may be aggregated to varying degrees at different times. For example, when a batch of data is just imported, it may not be aggregated with the existing data. But for users, user **can only query aggregated data**. That is, different degrees of aggregation are transparent to user queries. Users should always assume that data exists in terms of the degree of aggregation that **ultimately completes**, and **should not assume that some aggregation has not yet occurred**. (See the section **Limitations of the aggregation model** for more details.) - -## Unique Model - -In some multi-dimensional analysis scenarios, users are more concerned with how to ensure the uniqueness of Key, that is, how to obtain the Primary Key uniqueness constraint. Therefore, we introduce Unique's data model. This model is essentially a special case of aggregation model and a simplified representation of table structure. Let's give an example. - -|ColumnName|Type|IsKey|Comment| -|---|---|---|---| -| user_id | BIGINT | Yes | user id| -| username | VARCHAR (50) | Yes | User nickname| -| city | VARCHAR (20) | No | User City| -| age | SMALLINT | No | User Age| -| sex | TINYINT | No | User Gender| -| phone | LARGEINT | No | User Phone| -| address | VARCHAR (500) | No | User Address| -| register_time | DATETIME | No | user registration time| - -This is a typical user base information table. There is no aggregation requirement for this type of data, just the uniqueness of the primary key is guaranteed. (The primary key here is user_id + username). Then our statement is as follows: - -``` -CREATE TABLE IF NOT EXISTS example_db.expamle_tbl -( -`user_id` LARGEINT NOT NULL COMMENT "用户id", -`username` VARCHAR (50) NOT NULL COMMENT "25143;" 261651;" -`city` VARCHAR (20) COMMENT `User City', -`age` SMALLINT COMMENT "29992;" 25143;"24180;" 40836 ", -`sex` TINYINT COMMENT "用户性别", -`phone` LARGEINT COMMENT "用户电话", -`address` VARCHAR (500) COMMENT'25143;', -`register_time` DATETIME COMMENT "29992;" 25143;"27880;" 20876;"26102;" 38388;" -) -Unique Key (`user_id`, `username`) -... /* ignore Partition and Distribution */ -; -``` - -This table structure is exactly the same as the following table structure described by the aggregation model: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -| user_id | BIGINT | | user id| -| username | VARCHAR (50) | | User nickname| -| City | VARCHAR (20) | REPLACE | User City| -| age | SMALLINT | REPLACE | User Age| -| sex | TINYINT | REPLACE | User Gender| -| Phone | LARGEINT | REPLACE | User Phone| -| address | VARCHAR (500) | REPLACE | User Address| -| register_time | DATETIME | REPLACE | User registration time| - -And table-building statements: - -``` -CREATE TABLE IF NOT EXISTS example_db.expamle_tbl -( -`user_id` LARGEINT NOT NULL COMMENT "用户id", -`username` VARCHAR (50) NOT NULL COMMENT "25143;" 261651;" -`city` VARCHAR (20) REPLACE COMMENT `User City', -`sex` TINYINT REPLACE COMMENT "用户性别", -`phone` LARGEINT REPLACE COMMENT "25143;" -`address` VARCHAR(500) REPLACE COMMENT "用户地址", -`register_time` DATETIME REPLACE COMMENT "29992;" 25143;"27880;" 20876;"26102;" -) -AGGREGATE KEY(`user_id`, `username`) -... /* ignore Partition and Distribution */ -; -``` - -That is to say, Unique model can be completely replaced by REPLACE in aggregation model. Its internal implementation and data storage are exactly the same. No further examples will be given here. - -## Duplicate Model - -In some multidimensional analysis scenarios, data has neither primary keys nor aggregation requirements. Therefore, we introduce Duplicate data model to meet this kind of demand. Examples are given. - -|ColumnName|Type|SortKey|Comment| -|---|---|---|---| -| Timstamp | DATETIME | Yes | Logging Time| -| Type | INT | Yes | Log Type| -|error_code|INT|Yes|error code| -| Error_msg | VARCHAR (1024) | No | Error Details| -|op_id|BIGINT|No|operator id| -|op_time|DATETIME|No|operation time| - -The TABLE statement is as follows: -``` -CREATE TABLE IF NOT EXISTS example_db.expamle_tbl -( -`timestamp` DATETIME NOT NULL COMMENT "日志时间", -`type` INT NOT NULL COMMENT "日志类型", -"Error"\\\\\\\\\\\\\ -`error_msg` VARCHAR(1024) COMMENT "错误详细信息", -`op_id` BIGINT COMMENT "负责人id", -OP `op `time ` DATETIME COMMENT "22788;" 29702;"26102;" 388;" -) -DUPLICATE KEY(`timestamp`, `type`) -... /* 省略 Partition 和 Distribution 信息 */ -; -``` - -This data model is different from Aggregate and Unique models. Data is stored entirely in accordance with the data in the imported file, without any aggregation. Even if the two rows of data are identical, they will be retained. -The DUPLICATE KEY specified in the table building statement is only used to specify which columns the underlying data is sorted according to. (The more appropriate name should be "Sorted Column", where the name "DUPLICATE KEY" is used to specify the data model used. For more explanations of "Sorted Column", see the section [Prefix Index](https://doris.apache.org/getting-started/data-model-rollup.html#prefix-index). On the choice of DUPLICATE KEY, we recommend that the first 2-4 columns be selected appropriately. - -This data model is suitable for storing raw data without aggregation requirements and primary key uniqueness constraints. For more usage scenarios, see the [Limitations of the Aggregation Model](https://doris.apache.org/getting-started/data-model-rollup.html#limitations-of-aggregation-model) section. - -## ROLLUP - -ROLLUP in multidimensional analysis means "scroll up", which means that data is aggregated further at a specified granularity. - -### Basic concepts - -In Doris, we make the table created by the user through the table building statement a Base table. Base table holds the basic data stored in the way specified by the user's table-building statement. - -On top of the Base table, we can create any number of ROLLUP tables. These ROLLUP data are generated based on the Base table and physically **stored independently**. - -The basic function of ROLLUP tables is to obtain coarser aggregated data on the basis of Base tables. - -Let's illustrate the ROLLUP tables and their roles in different data models with examples. - -#### ROLLUP in Aggregate Model and Unique Model - -Because Unique is only a special case of the Aggregate model, we do not distinguish it here. - -Example 1: Get the total consumption per user - -Following **Example 2** in the **Aggregate Model** section, the Base table structure is as follows: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -| user_id | LARGEINT | | user id| -| date | DATE | | date of data filling| -| Time stamp | DATETIME | | Data filling time, accurate to seconds| -| City | VARCHAR (20) | | User City| -| age | SMALLINT | | User age| -| sex | TINYINT | | User gender| -| Last_visit_date | DATETIME | REPLACE | Last user access time| -| Cost | BIGINT | SUM | Total User Consumption| -| max dwell time | INT | MAX | Maximum user residence time| -| min dwell time | INT | MIN | User minimum residence time| - -The data stored are as follows: - -|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---|---| -| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing | 20 | 0 | 2017-10-01 06:00 | 20 | 10 | 10| -| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Beijing | 20 | 0 | 2017-10-01 07:00 | 15 | 2 | 2| -| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Beijing | 30 | 1 | 2017-10-01 17:05:45 | 2 | 22 | 22| -| 10002 | 2017-10-02 | 2017-10-02 13:10:00 | Shanghai | 20 | 1 | 2017-10-02 12:59:12 | 200 | 5 | 5| -| 10003 | 2017-10-02 | 2017-10-02 13:15:00 | Guangzhou | 32 | 0 | 2017-10-02 11:20:00 | 30 | 11 | 11| -| 10004 | 2017-10-01 | 2017-10-01 12:12:48 | Shenzhen | 35 | 0 | 2017-10-01 10:00:15 | 100 | 3 | 3| -| 10004 | 2017-10-03 | 2017-10-03 12:38:20 | Shenzhen | 35 | 0 | 2017-10-03 10:20:22 | 11 | 6 | 6| - -On this basis, we create a ROLLUP: - -|ColumnName| -|---| -|user_id| -|cost| - -The ROLLUP contains only two columns: user_id and cost. After the creation, the data stored in the ROLLUP is as follows: - -|user\_id|cost| -|---|---| -|10000|35| -|10001|2| -|10002|200| -|10003|30| -|10004|111| - -As you can see, ROLLUP retains only the results of SUM on the cost column for each user_id. So when we do the following query: - -`SELECT user_id, sum(cost) FROM table GROUP BY user_id;` - -Doris automatically hits the ROLLUP table, thus completing the aggregated query by scanning only a very small amount of data. - -2. Example 2: Get the total consumption, the longest and shortest page residence time of users of different ages in different cities - -Follow example 1. Based on the Base table, we create a ROLLUP: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -| City | VARCHAR (20) | | User City| -| age | SMALLINT | | User age| -| Cost | BIGINT | SUM | Total User Consumption| -| max dwell time | INT | MAX | Maximum user residence time| -| min dwell time | INT | MIN | User minimum residence time| - -After the creation, the data stored in the ROLLUP is as follows: - -|city|age|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---| -| Beijing | 20 | 35 | 10 | 2| -| Beijing | 30 | 2 | 22 | 22| -| Shanghai | 20 | 200 | 5 | 5| -| Guangzhou | 32 | 30 | 11 | 11| -| Shenzhen | 35 | 111 | 6 | 3| - -When we do the following queries: - -* `SELECT city, age, sum(cost), max(max_dwell_time), min(min_dwell_time) FROM table GROUP BY city, age;` -* `SELECT city, sum(cost), max(max_dwell_time), min(min_dwell_time) FROM table GROUP BY city;` -* `SELECT city, age, sum(cost), min(min_dwell_time) FROM table GROUP BY city, age;` - -Doris automatically hits the ROLLUP table. - -#### ROLLUP in Duplicate Model - -Because the Duplicate model has no aggregate semantics. So the ROLLLUP in this model has lost the meaning of "scroll up". It's just to adjust the column order to hit the prefix index. In the next section, we will introduce prefix index in detail, and how to use ROLLUP to change prefix index in order to achieve better query efficiency. - -### Prefix Index and ROLLUP - -#### prefix index - -Unlike traditional database design, Doris does not support indexing on any column. OLAP databases based on MPP architecture such as Doris usually handle large amounts of data by improving concurrency. -In essence, Doris's data is stored in a data structure similar to SSTable (Sorted String Table). This structure is an ordered data structure, which can be sorted and stored according to the specified column. In this data structure, it is very efficient to search by sorting columns. - -In Aggregate, Unique and Duplicate three data models. The underlying data storage is sorted and stored according to the columns specified in AGGREGATE KEY, UNIQUE KEY and DUPLICATE KEY in their respective table-building statements. - -The prefix index, which is based on sorting, implements an index method to query data quickly according to a given prefix column. - -We use the prefix index of **36 bytes** of a row of data as the prefix index of this row of data. When a VARCHAR type is encountered, the prefix index is truncated directly. We give examples to illustrate: - -1. The prefix index of the following table structure is user_id (8 Bytes) + age (4 Bytes) + message (prefix 20 Bytes). - -|ColumnName|Type| -|---|---| -|user_id|BIGINT| -|age|INT| -|message|VARCHAR(100)| -|max\_dwell\_time|DATETIME| -|min\_dwell\_time|DATETIME| - -2. The prefix index of the following table structure is user_name (20 Bytes). Even if it does not reach 36 bytes, because it encounters VARCHAR, it truncates directly and no longer continues. - -|ColumnName|Type| -|---|---| -|user_name|VARCHAR(20)| -|age|INT| -|message|VARCHAR(100)| -|max\_dwell\_time|DATETIME| -|min\_dwell\_time|DATETIME| - -When our query condition is the prefix of **prefix index**, it can greatly speed up the query speed. For example, in the first example, we execute the following queries: - -`SELECT * FROM table WHERE user_id=1829239 and age=20;` - -The efficiency of this query is **much higher than that of** the following queries: - -`SELECT * FROM table WHERE age=20;` - -Therefore, when constructing tables, **correctly choosing column order can greatly improve query efficiency**. - -#### ROLLUP adjusts prefix index - -Because column order is specified when a table is built, there is only one prefix index for a table. This may be inefficient for queries that use other columns that cannot hit prefix indexes as conditions. Therefore, we can manually adjust the order of columns by creating ROLLUP. Examples are given. - -The structure of the Base table is as follows: - -|ColumnName|Type| -|---|---| -|user\_id|BIGINT| -|age|INT| -|message|VARCHAR(100)| -|max\_dwell\_time|DATETIME| -|min\_dwell\_time|DATETIME| - -On this basis, we can create a ROLLUP table: - -|ColumnName|Type| -|---|---| -|age|INT| -|user\_id|BIGINT| -|message|VARCHAR(100)| -|max\_dwell\_time|DATETIME| -|min\_dwell\_time|DATETIME| - -As you can see, the columns of ROLLUP and Base tables are exactly the same, just changing the order of user_id and age. So when we do the following query: - -`SELECT * FROM table where age=20 and massage LIKE "%error%";` - -The ROLLUP table is preferred because the prefix index of ROLLUP matches better. - -### Some Explanations of ROLLUP - -* The fundamental role of ROLLUP is to improve the query efficiency of some queries (whether by aggregating to reduce the amount of data or by modifying column order to match prefix indexes). Therefore, the meaning of ROLLUP has gone beyond the scope of "roll-up". That's why we named it Materialized Index in the source code. -* ROLLUP is attached to the Base table and can be seen as an auxiliary data structure of the Base table. Users can create or delete ROLLUP based on the Base table, but cannot explicitly specify a query for a ROLLUP in the query. Whether ROLLUP is hit or not is entirely determined by the Doris system. -* ROLLUP data is stored in separate physical storage. Therefore, the more ROLLUP you create, the more disk space you occupy. It also has an impact on the speed of import (the ETL phase of import automatically generates all ROLLUP data), but it does not reduce query efficiency (only better). -* Data updates for ROLLUP are fully synchronized with Base representations. Users need not care about this problem. -* Columns in ROLLUP are aggregated in exactly the same way as Base tables. There is no need to specify or modify ROLLUP when creating it. -* A necessary (inadequate) condition for a query to hit ROLLUP is that **all columns** (including the query condition columns in select list and where) involved in the query exist in the column of the ROLLUP. Otherwise, the query can only hit the Base table. -* Certain types of queries (such as count(*)) cannot hit ROLLUP under any conditions. See the next section **Limitations of the aggregation model**. -* The query execution plan can be obtained by `EXPLAIN your_sql;` command, and in the execution plan, whether ROLLUP has been hit or not can be checked. -* Base tables and all created ROLLUP can be displayed by `DESC tbl_name ALL;` statement. - -In this document, you can see [Query how to hit Rollup](hit-the-rollup) - -## Limitations of aggregation model - -Here we introduce the limitations of Aggregate model (including Unique model). - -In the aggregation model, what the model presents is the aggregated data. That is to say, any data that has not yet been aggregated (for example, two different imported batches) must be presented in some way to ensure consistency. Let's give an example. - -The hypothesis table is structured as follows: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -| userid | LARGEINT | | user id| -| date | DATE | | date of data filling| -| Cost | BIGINT | SUM | Total User Consumption| - -Assume that there are two batches of data that have been imported into the storage engine as follows: - -**batch 1** - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|50| -|10002|2017-11-21|39| - -**batch 2** - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|1| -|10001|2017-11-21|5| -|10003|2017-11-22|22| - -As you can see, data belonging to user 10001 in two import batches has not yet been aggregated. However, in order to ensure that users can only query the aggregated data as follows: - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|51| -|10001|2017-11-21|5| -|10002|2017-11-21|39| -|10003|2017-11-22|22| - -We add aggregation operator to query engine to ensure data consistency. - -In addition, on the aggregate column (Value), when executing aggregate class queries that are inconsistent with aggregate types, attention should be paid to semantics. For example, in the example above, we execute the following queries: - -`SELECT MIN(cost) FROM table;` - -The result is 5, not 1. - -At the same time, this consistency guarantee will greatly reduce the query efficiency in some queries. - -Let's take the most basic count(*) query as an example: - -`SELECT COUNT(*) FROM table;` - -In other databases, such queries return results quickly. Because in the implementation, we can get the query result by counting rows at the time of import and saving count statistics information, or by scanning only a column of data to get count value at the time of query, with very little overhead. But in Doris's aggregation model, the overhead of this query **is very large**. - -Let's take the data as an example. - -**batch 1** - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|50| -|10002|2017-11-21|39| - -**batch 2** - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|1| -|10001|2017-11-21|5| -|10003|2017-11-22|22| - -Because the final aggregation result is: - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|51| -|10001|2017-11-21|5| -|10002|2017-11-21|39| -|10003|2017-11-22|22| - -So `select count(*) from table;` The correct result should be **4**. But if we only scan the `user_id`column and add query aggregation, the final result is **3** (10001, 10002, 10003). If aggregated without queries, the result is **5** (a total of five rows in two batches). It can be seen that both results are wrong. - -In order to get the correct result, we must read the data of `user_id` and `date`, and **together with aggregate** when querying, to return the correct result of **4**. That is to say, in the `count(*)` query, Doris must scan all AGGREGATE KEY columns (here are `user_id` and `date`) and aggregate them to get the semantically correct results. When aggregated columns are large, `count(*)` queries need to scan a large amount of data. - -Therefore, when there are frequent `count(*)` queries in the business, we recommend that users simulate `count(*)` by adding a column with a value of 1 and aggregation type of SUM. As the table structure in the previous example, we modify it as follows: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -| user ID | BIGINT | | user id| -| date | DATE | | date of data filling| -| Cost | BIGINT | SUM | Total User Consumption| -| count | BIGINT | SUM | for counting| - -Add a count column and import the data with the column value **equal to 1**. The result of `select count(*) from table;`is equivalent to `select sum(count) from table;` The query efficiency of the latter is much higher than that of the former. However, this method also has limitations, that is, users need to guarantee that they will not import rows with the same AGGREGATE KEY column repeatedly. Otherwise, `select sum(count) from table;`can only express the number of rows originally imported, not the semantics of `select count(*) from table;` - -Another way is to **change the aggregation type of the count column above to REPLACE, and still weigh 1**. Then`select sum(count) from table;` and `select count(*) from table;` the results will be consistent. And in this way, there is no restriction on importing duplicate rows. - -### Duplicate Model - -Duplicate model has no limitation of aggregation model. Because the model does not involve aggregate semantics, when doing count(*) query, we can get the correct semantics by choosing a column of queries arbitrarily. - -## Suggestions for Choosing Data Model - -Because the data model was established when the table was built, and **could not be modified**. Therefore, it is **very important** to select an appropriate data model. - -1. Aggregate model can greatly reduce the amount of data scanned and the amount of query computation by pre-aggregation. It is very suitable for report query scenarios with fixed patterns. But this model is not very friendly for count(*) queries. At the same time, because the aggregation method on the Value column is fixed, semantic correctness should be considered in other types of aggregation queries. -2. Unique model guarantees the uniqueness of primary key for scenarios requiring unique primary key constraints. However, the query advantage brought by pre-aggregation such as ROLLUP cannot be exploited (because the essence is REPLACE, there is no such aggregation as SUM). -3. Duplicate is suitable for ad-hoc queries of any dimension. Although it is also impossible to take advantage of the pre-aggregation feature, it is not constrained by the aggregation model and can take advantage of the queue-store model (only reading related columns, but not all Key columns). diff --git a/docs/en/getting-started/data-partition.md b/docs/en/getting-started/data-partition.md deleted file mode 100644 index 55c3fe2b6c..0000000000 --- a/docs/en/getting-started/data-partition.md +++ /dev/null @@ -1,398 +0,0 @@ ---- -{ - "title": "Data Partition", - "language": "en" -} ---- - - - -# Data Partition - -This document mainly introduces Doris's table construction and data partitioning, as well as problems and solutions that may be encountered in the construction of the table. - -## Basic Concepts - -In Doris, data is logically described in the form of a table. - -### Row & Column - -A table includes rows (rows) and columns (columns). Row is a row of data for the user. Column is used to describe different fields in a row of data. - -Column can be divided into two broad categories: Key and Value. From a business perspective, Key and Value can correspond to dimension columns and metric columns, respectively. From the perspective of the aggregation model, the same row of Key columns will be aggregated into one row. The way the Value column is aggregated is specified by the user when the table is built. For an introduction to more aggregation models, see the [Doris Data Model](./data-model-rollup.md). - -### Tablet & Partition - -In Doris's storage engine, user data is horizontally divided into several data slices (also known as data buckets). Each tablet contains several rows of data. The data between the individual tablets does not intersect and is physically stored independently. - -Multiple tablets are logically attributed to different partitions. A tablet belongs to only one Partition. And a Partition contains several Tablets. Because the tablet is physically stored independently, it can be considered that the Partition is physically independent. Tablet is the smallest physical storage unit for data movement, replication, and so on. - -Several Partitions form a Table. Partition can be thought of as the smallest logical unit of management. Importing and deleting data can be done for one Partition or only for one Partition. - -## Data division - -We use a table-building operation to illustrate Doris' data partitioning. - -Doris's built-in table is a synchronous command. If the command returns successfully, it means that the table is built successfully. - -See more help with `HELP CREATE TABLE;`. - -This section introduces Doris's approach to building tables with an example. - -``` --- Range Partition - -CREATE TABLE IF NOT EXISTS example_db.expamle_range_tbl -( - `user_id` LARGEINT NOT NULL COMMENT "User id", - `date` DATE NOT NULL COMMENT "Data fill in date time", - `timestamp` DATETIME NOT NULL COMMENT "Timestamp of data being poured", - `city` VARCHAR(20) COMMENT "The city where the user is located", - `age` SMALLINT COMMENT "User age", - `sex` TINYINT COMMENT "User gender", - `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "User last visit time", - `cost` BIGINT SUM DEFAULT "0" COMMENT "Total user consumption", - `max_dwell_time` INT MAX DEFAULT "0" COMMENT "User maximum dwell time", - `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "User minimum dwell time" -) -ENGINE=olap -AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`) -PARTITION BY RANGE(`date`) -( - PARTITION `p201701` VALUES LESS THAN ("2017-02-01"), - PARTITION `p201702` VALUES LESS THAN ("2017-03-01"), - PARTITION `p201703` VALUES LESS THAN ("2017-04-01") -) -DISTRIBUTED BY HASH(`user_id`) BUCKETS 16 -PROPERTIES -( - "replication_num" = "3", - "storage_medium" = "SSD", - "storage_cooldown_time" = "2018-01-01 12:00:00" -); - - --- List Partition - -CREATE TABLE IF NOT EXISTS example_db.expamle_list_tbl -( - `user_id` LARGEINT NOT NULL COMMENT "User id", - `date` DATE NOT NULL COMMENT "Data fill in date time", - `timestamp` DATETIME NOT NULL COMMENT "Timestamp of data being poured", - `city` VARCHAR(20) COMMENT "The city where the user is located", - `age` SMALLINT COMMENT "User Age", - `sex` TINYINT COMMENT "User gender", - `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "User last visit time", - `cost` BIGINT SUM DEFAULT "0" COMMENT "Total user consumption", - `max_dwell_time` INT MAX DEFAULT "0" COMMENT "User maximum dwell time", - `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "User minimum dwell time" -) -ENGINE=olap -AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`) -PARTITION BY LIST(`city`) -( - PARTITION `p_cn` VALUES IN ("Beijing", "Shanghai", "Hong Kong"), - PARTITION `p_usa` VALUES IN ("New York", "San Francisco"), - PARTITION `p_jp` VALUES IN ("Tokyo") -) -DISTRIBUTED BY HASH(`user_id`) BUCKETS 16 -PROPERTIES -( - "replication_num" = "3", - "storage_medium" = "SSD", - "storage_cooldown_time" = "2018-01-01 12:00:00" -); - -``` - -### Column Definition - -Here we only use the AGGREGATE KEY data model as an example. See the [Doris Data Model](./data-model-rollup.md) for more data models. - -The basic type of column can be viewed by executing `HELP CREATE TABLE;` in mysql-client. - -In the AGGREGATE KEY data model, all columns that do not specify an aggregation mode (SUM, REPLACE, MAX, MIN) are treated as Key columns. The rest is the Value column. - -When defining columns, you can refer to the following suggestions: - -1. The Key column must precede all Value columns. -2. Try to choose the type of integer. Because integer type calculations and lookups are much more efficient than strings. -3. For the selection principle of integer types of different lengths, follow **enough to**. -4. For lengths of type VARCHAR and STRING, follow **is sufficient**. -5. The total byte length of all columns (including Key and Value) cannot exceed 100KB. - -### Partitioning and binning - -Doris supports two levels of data partitioning. The first layer is Partition, which supports Range and List partitioning. The second layer is the Bucket (Tablet), which only supports Hash partitioning. - -It is also possible to use only one layer of partitioning. When using a layer partition, only Bucket partitioning is supported. - -1. Partition - - * The Partition column can specify one or more columns. The partition class must be a KEY column. The use of multi-column partitions is described later in the **Multi-column partitioning** summary.  - * Regardless of the type of partition column, double quotes are required when writing partition values. - * There is no theoretical limit on the number of partitions. - * When you do not use Partition to build a table, the system will automatically generate a Partition with the same name as the table name. This Partition is not visible to the user and cannot be modified. - - #### Range Partition - - * Partition columns are usually time columns for easy management of old and new data. - * Partition supports only the upper bound by `VALUES LESS THAN (...)`, the system will use the upper bound of the previous partition as the lower bound of the partition, and generate a left closed right open interval. Passing, also supports specifying the upper and lower bounds by `VALUES [...)`, and generating a left closed right open interval. - * It is easier to understand by specifying `VALUES [...)`. Here is an example of the change in partition range when adding or deleting partitions using the `VALUES LESS THAN (...)` statement: - * As in the `example_range_tbl` example above, when the table is built, the following 3 partitions are automatically generated: - ``` - P201701: [MIN_VALUE, 2017-02-01) - P201702: [2017-02-01, 2017-03-01) - P201703: [2017-03-01, 2017-04-01) - ``` - * When we add a partition p201705 VALUES LESS THAN ("2017-06-01"), the partition results are as follows: - - ``` - P201701: [MIN_VALUE, 2017-02-01) - P201702: [2017-02-01, 2017-03-01) - P201703: [2017-03-01, 2017-04-01) - P201705: [2017-04-01, 2017-06-01) - ``` - - * At this point we delete the partition p201703, the partition results are as follows: - - ``` - p201701: [MIN_VALUE, 2017-02-01) - p201702: [2017-02-01, 2017-03-01) - p201705: [2017-04-01, 2017-06-01) - ``` - - > Note that the partition range of p201702 and p201705 has not changed, and there is a hole between the two partitions: [2017-03-01, 2017-04-01). That is, if the imported data range is within this hole, it cannot be imported. - - * Continue to delete partition p201702, the partition results are as follows: - - ``` - p201701: [MIN_VALUE, 2017-02-01) - p201705: [2017-04-01, 2017-06-01) - The void range becomes: [2017-02-01, 2017-04-01) - ``` - - * Now add a partition p201702new VALUES LESS THAN ("2017-03-01"), the partition results are as follows: - - ``` - p201701: [MIN_VALUE, 2017-02-01) - p201702new: [2017-02-01, 2017-03-01) - p201705: [2017-04-01, 2017-06-01) - ``` - - > You can see that the hole size is reduced to: [2017-03-01, 2017-04-01) - - * Now delete partition p201701 and add partition p201612 VALUES LESS THAN ("2017-01-01"), the partition result is as follows: - - ``` - p201612: [MIN_VALUE, 2017-01-01) - p201702new: [2017-02-01, 2017-03-01) - p201705: [2017-04-01, 2017-06-01) - ``` - - > A new void appeared: [2017-01-01, 2017-02-01) - - In summary, the deletion of a partition does not change the scope of an existing partition. There may be holes in deleting partitions. When a partition is added by the `VALUES LESS THAN` statement, the lower bound of the partition immediately follows the upper bound of the previous partition. - - You cannot add partitions with overlapping ranges. - - #### List Partition - - * The partition column supports the `BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME, CHAR, VARCHAR` data type, and the partition value is an enumeration value. Partitions can be hit only if the data is one of the target partition enumeration values. - * Partition supports specifying the number of partitions contained in each partition via `VALUES IN (...) ` to specify the enumeration values contained in each partition. - * The following example illustrates how partitions change when adding or deleting partitions. - - * As in the `example_list_tbl` example above, when the table is built, the following three partitions are automatically created. - - ``` - p_cn: ("Beijing", "Shanghai", "Hong Kong") - p_usa: ("New York", "San Francisco") - p_jp: ("Tokyo") - ``` - - * When we add a partition p_uk VALUES IN ("London"), the result of the partition is as follows - - ``` - p_cn: ("Beijing", "Shanghai", "Hong Kong") - p_usa: ("New York", "San Francisco") - p_jp: ("Tokyo") - p_uk: ("London") - ``` - - * When we delete the partition p_jp, the result of the partition is as follows. - - ``` - p_cn: ("Beijing", "Shanghai", "Hong Kong") - p_usa: ("New York", "San Francisco") - p_uk: ("London") - ``` - - You cannot add partitions with overlapping ranges. - -2. Bucket - - * If a Partition is used, the `DISTRIBUTED ...` statement describes the division rules for the data in each partition. If you do not use Partition, it describes the rules for dividing the data of the entire table. - * The bucket column can be multiple columns, but it must be a Key column. The bucket column can be the same or different from the Partition column. - * The choice of bucket column is a trade-off between **query throughput** and **query concurrency**: - - 1. If you select multiple bucket columns, the data is more evenly distributed. However, if the query condition does not include the equivalent condition for all bucket columns, a query will scan all buckets. The throughput of such queries will increase, and the latency of a single query will decrease. This method is suitable for large throughput and low concurrent query scenarios. - 2. If you select only one or a few bucket columns, the point query can query only one bucket. This approach is suitable for high-concurrency point query scenarios. - - * There is no theoretical limit on the number of buckets. - -3. Recommendations on the number and amount of data for Partitions and Buckets. - - * The total number of tablets in a table is equal to (Partition num * Bucket num). - * The number of tablets in a table, which is slightly more than the number of disks in the entire cluster, regardless of capacity expansion. - * The data volume of a single tablet does not theoretically have an upper and lower bound, but is recommended to be in the range of 1G - 10G. If the amount of data for a single tablet is too small, the aggregation of the data is not good and the metadata management pressure is high. If the amount of data is too large, it is not conducive to the migration, completion, and increase the cost of Schema Change or Rollup operation failure retry (the granularity of these operations failure retry is Tablet). - * When the tablet's data volume principle and quantity principle conflict, it is recommended to prioritize the data volume principle. - * When building a table, the number of Buckets for each partition is uniformly specified. However, when dynamically increasing partitions (`ADD PARTITION`), you can specify the number of Buckets for the new partition separately. This feature can be used to easily reduce or expand data. - * Once the number of Buckets for a Partition is specified, it cannot be changed. Therefore, when determining the number of Buckets, you need to consider the expansion of the cluster in advance. For example, there are currently only 3 hosts, and each host has 1 disk. If the number of Buckets is only set to 3 or less, then even if you add more machines later, you can't increase the concurrency. - * Give some examples: Suppose there are 10 BEs, one for each BE disk. If the total size of a table is 500MB, you can consider 4-8 shards. 5GB: 8-16. 50GB: 32. 500GB: Recommended partitions, each partition is about 50GB in size, with 16-32 shards per partition. 5TB: Recommended partitions, each with a size of around 50GB and 16-32 shards per partition. - - > Note: The amount of data in the table can be viewed by the `show data` command. The result is divided by the number of copies, which is the amount of data in the table. - -#### Multi-column partition - -Doris supports specifying multiple columns as partition columns, examples are as follows: - -##### Range Partition - -``` - PARTITION BY RANGE(`date`, `id`) - ( - PARTITION `p201701_1000` VALUES LESS THAN ("2017-02-01", "1000"), - PARTITION `p201702_2000` VALUES LESS THAN ("2017-03-01", "2000"), - PARTITION `p201703_all` VALUES LESS THAN ("2017-04-01") - ) -``` - - In the above example, we specify `date`(DATE type) and `id`(INT type) as partition columns. The resulting partitions in the above example are as follows: - -``` -p201701_1000: [(MIN_VALUE, MIN_VALUE), ("2017-02-01", "1000") ) -p201702_2000: [("2017-02-01", "1000"), ("2017-03-01", "2000") ) -p201703_all: [("2017-03-01", "2000"), ("2017-04-01", MIN_VALUE)) -``` - -Note that the last partition user defaults only the partition value of the `date` column, so the partition value of the `id` column will be filled with `MIN_VALUE` by default. When the user inserts data, the partition column values ​​are compared in order, and the corresponding partition is finally obtained. Examples are as follows: - -``` - Data --> Partition - 2017-01-01, 200 --> p201701_1000 - 2017-01-01, 2000 --> p201701_1000 - 2017-02-01, 100 --> p201701_1000 - 2017-02-01, 2000 --> p201702_2000 - 2017-02-15, 5000 --> p201702_2000 - 2017-03-01, 2000 --> p201703_all - 2017-03-10, 1 --> p201703_all - 2017-04-01, 1000 --> Unable to import - 2017-05-01, 1000 --> Unable to import -``` - -##### List Partition - -``` - PARTITION BY LIST(`id`, `city`) - ( - PARTITION `p1_city` VALUES IN (("1", "Beijing"), ("1", "Shanghai")), - PARTITION `p2_city` VALUES IN (("2", "Beijing"), ("2", "Shanghai")), - PARTITION `p3_city` VALUES IN (("3", "Beijing"), ("3", "Shanghai")) - ) -``` - -In the above example, we specify `id`(INT type) and `city`(VARCHAR type) as partition columns. The above example ends up with the following partitions. - -``` - p1_city: [("1", "Beijing"), ("1", "Shanghai")] - p2_city: [("2", "Beijing"), ("2", "Shanghai")] - p3_city: [("3", "Beijing"), ("3", "Shanghai")] -``` - -When the user inserts data, the partition column values will be compared sequentially in order to finally get the corresponding partition. An example is as follows. - -``` -Data ---> Partition -1, Beijing ---> p1_city -1, Shanghai ---> p1_city -2, Shanghai ---> p2_city -3, Beijing ---> p3_city -1, Tianjin ---> Unable to import -4, Beijing ---> Unable to import -``` - -### PROPERTIES - -In the last PROPERTIES of the table statement, you can specify the following two parameters: - -Replication_num - - * The number of copies per tablet. The default is 3, it is recommended to keep the default. In the build statement, the number of Tablet copies in all Partitions is uniformly specified. When you add a new partition, you can individually specify the number of copies of the tablet in the new partition. - * The number of copies can be modified at runtime. It is strongly recommended to keep odd numbers. - * The maximum number of copies depends on the number of independent IPs in the cluster (note that it is not the number of BEs). The principle of replica distribution in Doris is that the copies of the same Tablet are not allowed to be distributed on the same physical machine, and the physical machine is identified as IP. Therefore, even if 3 or more BE instances are deployed on the same physical machine, if the BEs have the same IP, you can only set the number of copies to 1. - * For some small, and infrequently updated dimension tables, consider setting more copies. In this way, when joining queries, there is a greater probability of local data join. - -2. storage_medium & storage\_cooldown\_time - - * The BE data storage directory can be explicitly specified as SSD or HDD (differentiated by .SSD or .HDD suffix). When you build a table, you can uniformly specify the media for all Partition initial storage. Note that the suffix is ​​to explicitly specify the disk media without checking to see if it matches the actual media type. - * The default initial storage media can be specified by `default_storage_medium= XXX` in the fe configuration file `fe.conf`, or, if not, by default, HDD. If specified as an SSD, the data is initially stored on the SSD. - * If storage\_cooldown\_time is not specified, the data is automatically migrated from the SSD to the HDD after 30 days by default. If storage\_cooldown\_time is specified, the data will not migrate until the storage_cooldown_time time is reached. - * Note that when storage_medium is specified, if FE parameter 'enable_strict_storage_medium_check' is' False 'this parameter is simply a' do your best 'setting. Even if SSD storage media is not set up within the cluster, no errors are reported, and it is automatically stored in the available data directory. - Similarly, if the SSD media is not accessible and space is insufficient, it is possible to initially store data directly on other available media. When the data is due to be migrated to an HDD, the migration may also fail (but will try again and again) if the HDD medium is not accessible and space is insufficient. - If FE parameter 'enable_strict_storage_medium_check' is' True ', then 'Failed to find enough host in all Backends with storage medium is SSD' will be reported when SSD storage medium is not set in the cluster. - -### ENGINE - -In this example, the type of ENGINE is olap, the default ENGINE type. In Doris, only this ENGINE type is managed and stored by Doris. Other ENGINE types, such as mysql, broker, es, etc., are essentially mappings to tables in other external databases or systems to ensure that Doris can read the data. And Doris itself does not create, manage, and store any tables and data of a non-olap ENGINE type. - -### Other - -`IF NOT EXISTS` indicates that if the table has not been created, it is created. Note that only the table name is judged here, and it is not determined whether the new table structure is the same as the existing table structure. So if there is a table with the same name but different structure, the command will also return success, but it does not mean that a new table and a new structure have been created. - -## common problem - -### Build Table Operations FAQ - -1. If a syntax error occurs in a long build statement, a syntax error may be incomplete. Here is a list of possible syntax errors for manual error correction: - - * The syntax is incorrect. Please read `HELP CREATE TABLE;` carefully to check the relevant syntax structure. - * Reserved words. When the user-defined name encounters a reserved word, it needs to be enclosed in the backquote ``. It is recommended that all custom names be generated using this symbol. - * Chinese characters or full-width characters. Non-utf8 encoded Chinese characters, or hidden full-width characters (spaces, punctuation, etc.) can cause syntax errors. It is recommended to check with a text editor with invisible characters. - -2. `Failed to create partition [xxx] . Timeout` - - Doris builds are created in order of Partition granularity. This error may be reported when a Partition creation fails. Even if you don't use Partition, you will report `Failed to create partition` when there is a problem with the built table, because as mentioned earlier, Doris will create an unchangeable default Partition for tables that do not have a Partition specified. - - When this error is encountered, it is usually the BE that has encountered problems creating data fragments. You can follow the steps below to troubleshoot: - - 1. In fe.log, find the `Failed to create partition` log for the corresponding point in time. In this log, a series of numbers like `{10001-10010}` will appear. The first number of the pair is the Backend ID and the second number is the Tablet ID. As for the pair of numbers above, on the Backend with ID 10001, creating a tablet with ID 10010 failed. - 2. Go to the be.INFO log corresponding to Backend and find the log related to the tablet id in the corresponding time period. You can find the error message. - 3. Listed below are some common tablet creation failure errors, including but not limited to: - * BE did not receive the relevant task, and the tablet id related log could not be found in be.INFO. Or the BE is created successfully, but the report fails. For the above questions, see [Deployment and Upgrade Documentation] to check the connectivity of FE and BE. - * Pre-allocated memory failed. It may be that the length of a line in a row in the table exceeds 100KB. - * `Too many open files`. The number of open file handles exceeds the Linux system limit. The handle limit of the Linux system needs to be modified. - - You can also extend the timeout by setting `tablet_create_timeout_second=xxx` in fe.conf. The default is 2 seconds. - -3. The build table command does not return results for a long time. - - Doris's table creation command is a synchronous command. The timeout of this command is currently set to be relatively simple, ie (tablet num * replication num) seconds. If you create more data fragments and have fragment creation failed, it may cause an error to be returned after waiting for a long timeout. - - Under normal circumstances, the statement will return in a few seconds or ten seconds. If it is more than one minute, it is recommended to cancel this operation directly and go to the FE or BE log to view the related errors. diff --git a/docs/en/getting-started/hit-the-rollup.md b/docs/en/getting-started/hit-the-rollup.md deleted file mode 100644 index 4cc8d7ed42..0000000000 --- a/docs/en/getting-started/hit-the-rollup.md +++ /dev/null @@ -1,298 +0,0 @@ ---- -{ - "title": "Rollup and query", - "language": "en" -} ---- - - - -# Rollup and query - -As a polymer view in Doris, Rollup can play two roles in queries: - -* Index -* Aggregate data (only for aggregate models, aggregate key) - -However, in order to hit Rollup, certain conditions need to be met, and the value of PreAggregation of ScanNdo node in the execution plan can be used to determine whether Rollup can be hit or not, and the Rollup field can be used to determine which Rollup table is hit. - -## Noun Interpretation - -Base: Base table. - -Rollup: Generally, it refers to the Rollup tables created based on Base tables, but in some scenarios, it includes Base and Rollup tables. - -## Index - -Doris's prefix index has been introduced in the previous query practice, that is, Doris will generate the first 36 bytes in the Base/Rollup table separately in the underlying storage engine (with varchar type, the prefix index may be less than 36 bytes, varchar will truncate the prefix index, and use up to 20 bytes of varchar). A sorted sparse index data (data is also sorted, positioned by index, and then searched by dichotomy in the data), and then matched each Base/Rollup prefix index according to the conditions in the query, and selected a Base/Rollup that matched the longest prefix index. - -``` - ---> matching from left to right -+----+----+----+----+----+----+ -| c1 | c2 | c3 | c4 | c5 |... | -``` - -As shown in the figure above, the conditions of where and on in the query are pushed up and down to ScanNode and matched from the first column of the prefix index. Check if there are any of these columns in the condition, and then accumulate the matching length until the matching cannot match or the end of 36 bytes (columns of varchar type can only match 20 bytes and match less than 36 words). Section truncates prefix index, and then chooses a Base/Rollup with the longest matching length. The following example shows how to create a Base table and four rollups: - -``` -+---------------+-------+--------------+------+-------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+---------------+-------+--------------+------+-------+---------+-------+ -| test | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup_index1 | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup_index2 | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k1 | TINYINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup_index3 | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup_index4 | k4 | BIGINT | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -+---------------+-------+--------------+------+-------+---------+-------+ -``` - -The prefix indexes of the five tables are - -``` -Base(k1 ,k2, k3, k4, k5, k6, k7) - -rollup_index1(k9) - -rollup_index2(k9) - -rollup_index3(k4, k5, k6, k1, k2, k3, k7) - -rollup_index4(k4, k6, k5, k1, k2, k3, k7) -``` - -Conditions on columns that can be indexed with the prefix need to be `=` `<` `>` `<=` `>=` `in` `between`, and these conditions are side-by-side and the relationship uses `and` connections', which cannot be hit for `or`、`!=` and so on. Then look at the following query: - -``` -SELECT * FROM test WHERE k1 = 1 AND k2 > 3; -``` - -With the conditions on K1 and k2, check that only the first column of Base contains K1 in the condition, so match the longest prefix index, test, explain: - -``` -| 0:OlapScanNode -| TABLE: test -| PREAGGREGATION: OFF. Reason: No AggregateInfo -| PREDICATES: `k1` = 1, `k2` > 3 -| partitions=1/1 -| rollup: test -| buckets=1/10 -| cardinality=-1 -| avgRowSize=0.0 -| numNodes=0 -| tuple ids: 0 -``` - -Look again at the following queries: - -`SELECT * FROM test WHERE k4 =1 AND k5 > 3;` - -With K4 and K5 conditions, check that the first column of rollup_index3 and rollup_index4 contains k4, but the second column of rollup_index3 contains k5, so the matching prefix index is the longest. - -``` -| 0:OlapScanNode -| TABLE: test -| PREAGGREGATION: OFF. Reason: No AggregateInfo -| PREDICATES: `k4` = 1, `k5` > 3 -| partitions=1/1 -| rollup: rollup_index3 -| buckets=10/10 -| cardinality=-1 -| avgRowSize=0.0 -| numNodes=0 -| tuple ids: 0 -``` - -Now we try to match the conditions on the column containing varchar, as follows: - -`SELECT * FROM test WHERE k9 IN ("xxx", "yyyy") AND k1 = 10;` - -There are K9 and K1 conditions. The first column of rollup_index1 and rollup_index2 contains k9. It is reasonable to choose either rollup here to hit the prefix index and randomly select the same one (because there are just 20 bytes in varchar, and the prefix index is truncated in less than 36 bytes). The current strategy here will continue to match k1, because the second rollup_index1 is listed as k1, so rollup_index1 is chosen, in fact, the latter K1 condition will not play an accelerating role. (If the condition outside the prefix index needs to accelerate the query, it can be accelerated by establishing a Bloom Filter filter. Typically for string types, because Doris has a Block level for columns, a Min/Max index for shaping and dates.) The following is the result of explain. - -``` -| 0:OlapScanNode -| TABLE: test -| PREAGGREGATION: OFF. Reason: No AggregateInfo -| PREDICATES: `k9` IN ('xxx', 'yyyy'), `k1` = 10 -| partitions=1/1 -| rollup: rollup_index1 -| buckets=1/10 -| cardinality=-1 -| avgRowSize=0.0 -| numNodes=0 -| tuple ids: 0 -``` - -Finally, look at a query that can be hit by more than one Rollup: - -`Select * from test where K4 < 1000 and K5 = 80 and K6 = 10000;` - -There are three conditions: k4, K5 and k6. The first three columns of rollup_index3 and rollup_index4 contain these three columns respectively. So the prefix index length matched by them is the same. Both can be selected. The current default strategy is to select a rollup created earlier. Here is rollup_index3. - -``` -| 0:OlapScanNode -| TABLE: test -| PREAGGREGATION: OFF. Reason: No AggregateInfo -| PREDICATES: `k4` < 1000, `k5` = 80, `k6` >= 10000.0 -| partitions=1/1 -| rollup: rollup_index3 -| buckets=10/10 -| cardinality=-1 -| avgRowSize=0.0 -| numNodes=0 -| tuple ids: 0 -``` - -If you modify the above query slightly as follows: - -`SELECT * FROM test WHERE k4 < 1000 AND k5 = 80 OR k6 >= 10000;` - -The query here cannot hit the prefix index. (Even any Min/Max in the Doris storage engine, the BloomFilter index doesn't work.) - -## Aggregate data - -Of course, the function of aggregated data is indispensable for general polymer views. Such materialized views are very helpful for aggregated queries or report queries. To hit the polymer views, the following prerequisites are needed: - -1. There is a separate Rollup for all columns involved in a query or subquery. -2. If there is Join in a query or sub-query, the type of Join needs to be Inner join. - -The following are some types of aggregated queries that can hit Rollup. - -| Column type Query type | Sum | Distinct/Count Distinct | Min | Max | APPROX_COUNT_DISTINCT | -|--------------|-------|-------------------------|-------|-------|-------| -| Key | false | true | true | true | true | -| Value(Sum) | true | false | false | false | false | -|Value(Replace)| false | false | false | false | false | -| Value(Min) | false | false | true | false | false | -| Value(Max) | false | false | false | true | false | - - -If the above conditions are met, there will be two stages in judging the hit of Rollup for the aggregation model: - -1. Firstly, the Rollup table with the longest index hit by prefix index is matched by conditions. See the index strategy above. -2. Then compare the rows of Rollup and select the smallest Rollup. - -The following Base table and Rollup: - -``` -+-------------+-------+--------------+------+-------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+-------------+-------+--------------+------+-------+---------+-------+ -| test_rollup | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup2 | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup1 | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -+-------------+-------+--------------+------+-------+---------+-------+ -``` - - -See the following queries: - -`SELECT SUM(k11) FROM test_rollup WHERE k1 = 10 AND k2 > 200 AND k3 in (1,2,3);` - -Firstly, it judges whether the query can hit the aggregated Rollup table. After checking the graph above, it is possible. Then the condition contains three conditions: k1, K2 and k3. The first three columns of test_rollup, rollup1 and rollup2 contain all the three conditions. So the prefix index length is the same. Then, it is obvious that the aggregation degree of rollup2 is the highest when comparing the number of rows. Row 2 is selected because of the minimum number of rows. - -``` -| 0:OlapScanNode | -| TABLE: test_rollup | -| PREAGGREGATION: ON | -| PREDICATES: `k1` = 10, `k2` > 200, `k3` IN (1, 2, 3) | -| partitions=1/1 | -| rollup: rollup2 | -| buckets=1/10 | -| cardinality=-1 | -| avgRowSize=0.0 | -| numNodes=0 | -| tuple ids: 0 | -``` diff --git a/new-docs/en/install/install-deploy.md b/docs/en/install/install-deploy.md similarity index 100% rename from new-docs/en/install/install-deploy.md rename to docs/en/install/install-deploy.md diff --git a/new-docs/en/install/source-install/compilation-arm.md b/docs/en/install/source-install/compilation-arm.md similarity index 100% rename from new-docs/en/install/source-install/compilation-arm.md rename to docs/en/install/source-install/compilation-arm.md diff --git a/docs/en/installing/compilation-with-ldb-toolchain.md b/docs/en/install/source-install/compilation-with-ldb-toolchain.md similarity index 100% rename from docs/en/installing/compilation-with-ldb-toolchain.md rename to docs/en/install/source-install/compilation-with-ldb-toolchain.md diff --git a/new-docs/en/install/source-install/compilation.md b/docs/en/install/source-install/compilation.md similarity index 100% rename from new-docs/en/install/source-install/compilation.md rename to docs/en/install/source-install/compilation.md diff --git a/docs/en/installing/compilation-arm.md b/docs/en/installing/compilation-arm.md deleted file mode 100644 index 305ee35aac..0000000000 --- a/docs/en/installing/compilation-arm.md +++ /dev/null @@ -1,258 +0,0 @@ ---- -{ - "title": "Compile on ARM platform", - "language": "en" -} ---- - - - - -# Compile and Run Doris on ARM64 + KylinOS. - -This document describes how to compile Doris on the ARM64 platform. - -Note that this document is only a guide document. Other errors may occur when compiling in different environments. - -## Software and hardware environment - -1. KylinOS version: - - ``` - $> cat /etc/.kyinfo - name=Kylin-Server - milestone=10-SP1-Release-Build04-20200711 - arch=arm64 - beta=False - time=2020-07-11 17:16:54 - dist_id=Kylin-Server-10-SP1-Release-Build04-20200711-arm64-2020-07-11 17:16:54 - ``` - -2. CPU model - - ``` - $> cat /proc/cpuinfo - model name: Phytium,FT-2000+/64 - ``` - -## Compile using ldb-toolchain - -This method works with Doris versions after [commit 7f3564](https://github.com/apache/incubator-doris/commit/7f3564cca62de49c9f2ea67fcf735921dbebb4d1) - -Download [ldbi\_toolchain\_gen.aarch64.sh](https://github.com/amosbird/ldb_toolchain_gen/releases/download/v0.9.1/ldb_toolchain_gen.aarch64.sh) - -For subsequent compilation, see [Compiling with LDB toolchain](./compilation-with-ldb-toolchain.md) - -Note that both jdk and nodejs need to be downloaded with the corresponding aarch64 versions: - -1. [Java8-aarch64](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/jdk-8u291-linux-aarch64.tar.gz) -2. [Node v12.13.0-aarch64](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/node-v16.3.0-linux-arm64.tar.xz) - -## ~~ Compile with GCC 10 (deprecated) ~~ - -This method only works with Doris source code before [commit 68bab73](https://github.com/apache/incubator-doris/commit/68bab73c359e40bf485a663e9a6e6ee76d81d382). - -### Compilation tool installation (no network) - -In the example, all tools are installed in the `/home/doris/tools/installed/` directory. - -Please obtain the required installation package first under network conditions. - -#### 1. Install gcc10 - -Download gcc-10.1.0 - -``` -wget https://mirrors.tuna.tsinghua.edu.cn/gnu/gcc/gcc-10.1.0/gcc-10.1.0.tar.gz -``` - -After unzipping, check the dependencies in `contrib/download_prerequisites` and download: - -``` -http://gcc.gnu.org/pub/gcc/infrastructure/gmp-6.1.0.tar.bz2 -http://gcc.gnu.org/pub/gcc/infrastructure/mpfr-3.1.4.tar.bz2 -http://gcc.gnu.org/pub/gcc/infrastructure/mpc-1.0.3.tar.gz -http://gcc.gnu.org/pub/gcc/infrastructure/isl-0.18.tar.bz2 -``` - -Unzip these four dependencies, then move to the gcc-10.1.0 source directory and rename them to gmp, isl, mpc, mpfr. - -Download and install automake-1.15 (because gcc10 will find automake 1.15 version during compilation) - -``` -https://ftp.gnu.org/gnu/automake/automake-1.15.tar.gz -tar xzf automake-1.15.tar.gz -./configure --prefix=/home/doris/tools/installed -make && make install -export PATH=/home/doris/tools/installed/bin:$PATH -``` - -Compile GCC10: - -``` -cd gcc-10.1.0 -./configure --prefix=/home/doris/tools/installed -make -j && make install -``` - -Compile time is longer. - -#### 2. Install other compilation components - -1. jdk-8u291-linux-aarch64.tar.gz - - `https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html` - - No need to compile, just use it out of the box. - -2. cmake-3.19.8-Linux-aarch64.tar.gz - - `https://cmake.org/download/` - - No need to compile, just use it out of the box - -3. apache-maven-3.8.1-bin.tar.gz - - `https://maven.apache.org/download.cgi` - - No need to compile, just use it out of the box - -4. nodejs 16.3.0 - - `https://nodejs.org/dist/v16.3.0/node-v16.3.0-linux-arm64.tar.xz` - - No need to compile, just use it out of the box - -5. libtool-2.4.6.tar.gz - - For compiling third-party components, although the system may come with libtool, libtool needs to be together with automake, so it is not easy to cause problems. - - ``` - https://ftp.gnu.org/gnu/libtool/libtool-2.4.6.tar.gz - cd libtool-2.4.6/ - ./configure --prefix=/home/doris/tools/installed - make -j && make install - ``` - -6. binutils-2.36.tar.xz (obtain bdf.h) - - ``` - https://ftp.gnu.org/gnu/binutils/binutils-2.36.tar.bz2 - ./configure --prefix=/home/doris/tools/installed - make -j && make install - ``` - -7. Libiberty (for compiling BE) - - The source code of this library is under the source code package of gcc-10.1.0 - ``` - cd gcc-10.1.0/libiberty/ - ./configure --prefix=/home/doris/tools/installed - make - ``` - - After compilation, libiberty.a will be generated, which can be moved to the lib64 directory of Doris' thirdparty. - -#### 3. Compile third-party libraries - -Suppose Doris source code is under `/home/doris/doris-src/`. - -1. Manually download all third-party libraries and place them in the thirdparty/src directory. -2. Add `custom_env.sh` in the Doris source directory and add the following content - - ``` - export DORIS_THIRDPARTY=/home/doris/doris-src/thirdparty/ - export JAVA_HOME=/home/doris/tools/jdk1.8.0_291/ - export DORIS_GCC_HOME=/home/doris/tools/installed/ - export PATCH_COMPILER_RT=true - ``` - - Pay attention to replace the corresponding directory - -3. Modify part of the content in build-thirdparty.sh - - 1. Close `build_mysql` and `build_libhdfs3` - - mysql is no longer needed. However, libhdfs3 does not support arm architecture for the time being, so running Doris in arm does not support direct access to hdfs through libhdfs3, and requires a broker. - - 2. Add the configure parameter in `build_curl`: `--without-libpsl`. If it is not added, an error may be reported during the linking phase of the final compilation of Doris BE: `undefined reference to ‘psl_is_cookie_domain_acceptable'` - -4. Execute build-thirdparty.sh. Here are only possible errors - - * `error: narrowing conversion of'-1' from'int' to'char' [-Wnarrowing]` - - There will be an error when compiling brpc 0.9.7. The solution is to add `-Wno-narrowing` in `CMAKE_CXX_FLAGS` of CMakeLists.txt of brpc. This problem has been fixed in the brpc master code: - - `https://github.com/apache/incubator-brpc/issues/1091` - - * `libz.a(deflate.o): relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `z_errmsg' which may bind externally can not be used when making a shared object; recompile with -fPIC` - - There will be errors when compiling brpc 0.9.7, and libcrypto will also report similar errors. The reason is unknown. It seems that under aarch64, brpc needs to link the dynamic zlib and crypto libraries. But when we compile these two third-party libraries, we only compiled .a static files. Solution: Recompile zlib and openssl to generate .so dynamic library: - - Open `build-thirdparty.sh`, find the `build_zlib` function, and change: - - ``` - ./configure --prefix=$TP_INSTALL_DIR --static - Just change to - ./configure --prefix=$TP_INSTALL_DIR - ``` - - Find `build_openssl` and comment out the following parts: - - ``` - #if [-f $TP_INSTALL_DIR/lib64/libcrypto.so ]; then - # rm -rf $TP_INSTALL_DIR/lib64/libcrypto.so* - #fi - #if [-f $TP_INSTALL_DIR/lib64/libssl.so ]; then - # rm -rf $TP_INSTALL_DIR/lib64/libssl.so* - #fi - ``` - - Then go to `build-thirdparty.sh`, comment out other `build_xxx`, open only `build_zlib` and `build_openssl`, and `build_brpc` and later `build_xxx`. Then re-execute `build-thirdparty.sh`. - - * The compilation is stuck at a certain stage. - - Not sure why. Solution: Rerun `build-thirdparty.sh`. `build-thirdparty.sh` can be executed repeatedly. - -#### 4. Compile Doris source code - -First run the following command to check whether the compilation machine supports the avx2 instruction set - -``` -$ cat /proc/cpuinfo | grep avx2 -``` - -If it is not supported, use the following command to compile - -``` -$ USE_AVX2=0 sh build.sh -``` - -If supported, you can directly execute `sh build.sh` without adding USE_AVX2=0. - - -#### 5. FAQ - -1. `undefined reference to psl_free` appears when compiling Doris - - libcurl will call libpsl functions, but libpsl is not linked for an unknown reason. Solutions (choose one of the two): - - 1. Add `--without-libpsl` to the `build_curl` method in `thirdparty/build-thirdparty.sh`, recompile libcurl, and then recompile Doris. - 2. About line 603 in `be/CMakeLists.txt`, add `-lpsl` after `-pthread`, and then recompile Doris. diff --git a/docs/en/installing/compilation.md b/docs/en/installing/compilation.md deleted file mode 100644 index 3f25b0f8cb..0000000000 --- a/docs/en/installing/compilation.md +++ /dev/null @@ -1,263 +0,0 @@ ---- -{ - "title": "Compilation", - "language": "en" -} ---- - - - - -# Compilation - -This document focuses on how to code Doris through source code. - -## Developing mirror compilation using Docker (recommended) - -### Use off-the-shelf mirrors - -1. Download Docker Mirror - - `$ docker pull apache/incubator-doris:build-env-ldb-toolchain-latest` - - Check mirror download completed: - - ``` - $ docker images - REPOSITORY TAG IMAGE ID CREATED SIZE - apache/incubator-doris build-env-ldb-toolchain-latest 49f68cecbc1a 4 days ago 3.76GB - ``` - -> Note1: For different versions of Doris, you need to download the corresponding mirror version. From Apache Doris 0.15 version, the docker image will keep same version number with Doris. For example, you can use `apache/incubator-doris:build-env-for-0.15.0` to compile Apache Doris 0.15.0. -> -> Node2: `apache/incubator-doris:build-env-ldb-toolchain-latest` is for compiling trunk code, and will be updated along with trunk code. View the update time in `docker/README.md` - -| image version | commit id | release version | -|---|---|---| -| apache/incubator-doris:build-env | before [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) | 0.8.x, 0.9.x | -| apache/incubator-doris:build-env-1.1 | [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) or later | 0.10.x or later | -| apache/incubator-doris:build-env-1.2 | [4ef5a8c](https://github.com/apache/incubator-doris/commit/4ef5a8c8560351d7fff7ff8fd51c4c7a75e006a8) or later | 0.12.x - 0.14.0 | -| apache/incubator-doris:build-env-1.3.1 | [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) or later | 0.14.x | -| apache/incubator-doris:build-env-for-0.15.0 | [a81f4da](https://github.com/apache/incubator-doris/commit/a81f4da4e461a54782a96433b746d07be89e6b54) or later | 0.15.0 | -| apache/incubator-doris:build-env-latest | before [0efef1b](https://github.com/apache/incubator-doris/commit/0efef1b332300887ee0473f9df9bdd9d7297d824) | | -| apache/incubator-doris:build-env-ldb-toolchain-latest | trunk | trunk | - -**note**: - -> 1. Dev docker image [ChangeLog](https://github.com/apache/incubator-doris/blob/master/thirdparty/CHANGELOG.md) - -> 2. Doris version 0.14.0 still uses apache/incubator-doris:build-env-1.2 to compile, and the 0.14.x code will use apache/incubator-doris:build-env-1.3.1. - -> 3. From docker image of build-env-1.3.1, both OpenJDK 8 and OpenJDK 11 are included, and OpenJDK 11 is used for compilation by default. Please make sure that the JDK version used for compiling is the same as the JDK version used at runtime, otherwise it may cause unexpected operation errors. You can use the following command to switch the default JDK version in container: -> -> Switch to JDK 8: -> -> ``` -> $ alternatives --set java java-1.8.0-openjdk.x86_64 -> $ alternatives --set javac java-1.8.0-openjdk.x86_64 -> $ export JAVA_HOME=/usr/lib/jvm/java-1.8.0 -> ``` -> -> Switch to JDK 11: -> -> ``` -> $ alternatives --set java java-11-openjdk.x86_64 -> $ alternatives --set javac java-11-openjdk.x86_64 -> $ export JAVA_HOME=/usr/lib/jvm/java-11 -> ``` - -2. Running Mirror - - `$ docker run -it apache/incubator-doris:build-env-ldb-toolchain-latest` - - It is recommended to run the container by mounting the local Doris source directory, so that the compiled binary file will be stored in the host machine and will not disappear because the container exits. - - At the same time, it is recommended to mount the maven `.m2` directory in the mirror to the host directory at the same time to prevent repeated downloading of maven's dependent libraries each time the compilation is started. - - ``` - $ docker run -it -v /your/local/.m2:/root/.m2 -v /your/local/incubator-doris-DORIS-x.x.x-release/:/root/incubator-doris-DORIS-x.x.x-release/ apache/incubator-doris:build-env-ldb-toolchain-latest - ``` - -3. Download source code - - After starting the mirror, you should be in the container. The Doris source code can be downloaded from the following command (local source directory mounted is not required): - - ``` - $ wget https://dist.apache.org/repos/dist/dev/incubator/doris/xxx.tar.gz - or - $ git clone https://github.com/apache/incubator-doris.git - ``` - -4. Compile Doris - - First run the following command to check whether the compilation machine supports the avx2 instruction set - - ``` - $ cat /proc/cpuinfo | grep avx2 - ``` - - If it is not supported, use the following command to compile - - ``` - $ USE_AVX2=0 sh build.sh - ``` - - If supported, compile directly without adding USE_AVX2=0 - - ``` - $ sh build.sh - ``` - - > **Note:** - > - > If you are using `build-env-for-0.15.0` or later version for the first time, use the following command when compiling: - > - > `sh build.sh --clean --be --fe --ui` - > - > This is because from build-env-for-0.15.0, we upgraded thrift (0.9 -> 0.13), you need to use the --clean command to force the use of the new version of thrift to generate code files, otherwise incompatible code will appear. - - After compilation, the output file is in the `output/` directory. - -### Self-compiling Development Environment Mirror - -You can also create a Doris development environment mirror yourself, referring specifically to the `docker/README.md` file. - - -## Direct Compilation (CentOS/Ubuntu) - -You can try to compile Doris directly in your own Linux environment. - -1. System Dependence - * Before commit [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) will use the dependencies as follows: - - `GCC 7.3+, Oracle JDK 1.8+, Python 2.7+, Apache Maven 3.5+, CMake 3.11+ Bison 3.0+` - - If you are using Ubuntu 16.04 or newer, you can use the following command to install the dependencies - - `sudo apt-get install build-essential openjdk-8-jdk maven cmake byacc flex automake libtool-bin bison binutils-dev libiberty-dev zip unzip libncurses5-dev curl git ninja-build python autopoint pkg-config` - - If you are using CentOS you can use the following command to install the dependencies - - `sudo yum groupinstall 'Development Tools' && sudo yum install maven cmake byacc flex automake libtool bison binutils-devel zip unzip ncurses-devel curl git wget python2 glibc-static libstdc++-static java-1.8.0-openjdk` - - * After commit [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) will use the dependencies as follows: - - `GCC 10+, Oracle JDK 1.8+, Python 2.7+, Apache Maven 3.5+, CMake 3.19.2+ Bison 3.0+` - - If you are using Ubuntu 16.04 or newer, you can use the following command to install the dependencies - - ``` - sudo apt install build-essential openjdk-8-jdk maven cmake byacc flex automake libtool-bin bison binutils-dev libiberty-dev zip unzip libncurses5-dev curl git ninja-build python - sudo add-apt-repository ppa:ubuntu-toolchain-r/ppa - sudo apt update - sudo apt install gcc-10 g++-10 - sudo apt-get install autoconf automake libtool autopoint - ``` - If you are using CentOS you can use the following command to install the dependencies - - ``` - sudo yum groupinstall 'Development Tools' && sudo yum install maven cmake byacc flex automake libtool bison binutils-devel zip unzip ncurses-devel curl git wget python2 glibc-static libstdc++-static java-1.8.0-openjdk - sudo yum install centos-release-scl - sudo yum install devtoolset-10 - scl enable devtoolset-10 bash - ``` - If devtoolset-10 is not found in current repo. Oracle has already rebuilt the devtoolset-10 packages. You can use this repo file: - ``` - [ol7_software_collections] - name=Software Collection packages for Oracle Linux 7 ($basearch) - baseurl=http://yum.oracle.com/repo/OracleLinux/OL7/SoftwareCollections/$basearch/ - gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-oracle - gpgcheck=1 - enabled=1 - ``` - After installation, set environment variables `PATH`, `JAVA_HOME`, etc. - > nit: you can find the jdk install directory by using command `alternatives --list` - - Doris 0.14.0 will use gcc7 env to compile. - -2. Compile Doris - - As with Docker development image compilation, check whether the avx2 instruction is supported before compiling - - ``` - $ cat /proc/cpuinfo | grep avx2 - ``` - - If supported, use the following command to compile - - ``` - $ sh build.sh - ``` - - If it is not supported, you need to add USE_AVX2=0 - - ``` - $ USE_AVX2=0 sh build.sh - ``` - - After compilation, the output files are in the `output/` directory. - -## FAQ - -1. `Could not transfer artifact net.sourceforge.czt.dev:cup-maven-plugin:pom:1.6-cdh from/to xxx` - - If you encounter the above error, please refer to [PR #4769](https://github.com/apache/incubator-doris/pull/4769/files) to modify the cloudera-related repo configuration in `fe/pom.xml`. - -2. The third party relies on download connection errors, failures, etc. - - The download links of the third-party libraries that Doris relies on are all in the `thirdparty/vars.sh` file. Over time, some download connections may fail. If you encounter this situation. It can be solved in the following two ways: - - 1. Manually modify the `thirdparty/vars.sh` file - - Manually modify the problematic download connection and the corresponding MD5 value. - - 2. Use a third-party download warehouse: - - ``` - export REPOSITORY_URL=https://doris-thirdparty-repo.bj.bcebos.com/thirdparty - sh build-thirdparty.sh - ``` - - REPOSITORY_URL contains all third-party library source code packages and their historical versions. - -3. `fatal error: Killed signal terminated program ...` - - If you encounter the above error when compiling with a Docker image, it may be that the memory allocated to the image is insufficient (the default memory size allocated by Docker is 2GB, and the peak memory usage during the compilation process is greater than 2GB). - - Try to increase the allocated memory of the image appropriately, 4GB ~ 8GB is recommended. - -## Special statement - -Starting from version 0.13, the dependency on the two third-party libraries [1] and [2] will be removed in the default compiled output. These two third-party libraries are under [GNU General Public License V3](https://www.gnu.org/licenses/gpl-3.0.en.html). This license is incompatible with [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0), so it should not appear in the Apache release by default. - -Remove library [1] will result in the inability to access MySQL external tables. The feature of accessing MySQL external tables will be implemented through `UnixODBC` in future release version. - -Remove library [2] will cause some data written in earlier versions (before version 0.8) to be unable to read. Because the data in the earlier version was compressed using the LZO algorithm, in later versions, it has been changed to the LZ4 compression algorithm. We will provide tools to detect and convert this part of the data in the future. - -If required, users can continue to use these two dependent libraries. If you want to use it, you need to add the following options when compiling: - -``` -WITH_MYSQL=1 WITH_LZO=1 sh build.sh -``` - -Note that when users rely on these two third-party libraries, Doris is not used under the Apache License 2.0 by default. Please pay attention to the GPL related agreements. - -* [1] mysql-5.7.18 -* [2] lzo-2.10 diff --git a/docs/en/installing/install-deploy.md b/docs/en/installing/install-deploy.md deleted file mode 100644 index bf572044dd..0000000000 --- a/docs/en/installing/install-deploy.md +++ /dev/null @@ -1,490 +0,0 @@ ---- -{ - "title": "Installation and deployment", - "language": "en" -} ---- - - - - -# Installation and deployment - -This document mainly introduces the hardware and software environment needed to deploy Doris, the proposed deployment mode, cluster expansion and scaling, and common problems in the process of cluster building and running. -Before reading this document, compile Doris according to the compiled document. - -## Software and hardware requirements - -### Overview - -Doris, as an open source MPP architecture OLAP database, can run on most mainstream commercial servers. In order to make full use of the concurrency advantages of MPP architecture and the high availability features of Doris, we recommend that the deployment of Doris follow the following requirements: - -#### Linux Operating System Version Requirements - -| Linux System | Version| -|---|---| -| Centos | 7.1 and above | -| Ubuntu | 16.04 and above | - -#### Software requirements - -| Soft | Version | -|---|---| -| Java | 1.8 and above | -| GCC | 4.8.2 and above | - -#### OS Installation Requirements - -##### Set the maximum number of open file handles in the system - -```` -vi /etc/security/limits.conf -*soft nofile 65536 -*hard nofile 65536 -```` - -##### Clock synchronization - -The metadata of Doris requires the time precision to be less than 5000ms, so all machines in the cluster need to synchronize the clocks to avoid service exceptions caused by inconsistencies in metadata caused by clock problems. - -##### Close the swap partition (swap) - -The Linux swap partition will cause serious performance problems for Doris, you need to disable the swap partition before installation - -##### Linux file system - -Here we recommend using the ext4 file system. When installing the operating system, please select the ext4 file system. - -#### Development Test Environment - -| Module | CPU | Memory | Disk | Network | Instance Number| -|---|---|---|---|---|---| -| Frontend | 8 core + | 8GB + | SSD or SATA, 10GB + * | Gigabit Network Card | 1| -| Backend | 8-core + | 16GB + | SSD or SATA, 50GB + * | Gigabit Network Card | 1-3*| - -#### Production environment - -| Module | CPU | Memory | Disk | Network | Number of Instances (Minimum Requirements)| -|---|---|---|---|---|---| -| Frontend | 16 core + | 64GB + | SSD or RAID card, 100GB + * | 10,000 Mbp network card | 1-5*| -| Backend | 16 core + | 64GB + | SSD or SATA, 100G + * | 10-100 Mbp network card*| - -> Note 1: -> -> 1. The disk space of FE is mainly used to store metadata, including logs and images. Usually it ranges from several hundred MB to several GB. -> 2. BE's disk space is mainly used to store user data. The total disk space is calculated according to the user's total data * 3 (3 copies). Then an additional 40% of the space is reserved for background compaction and some intermediate data storage. -> 3. Multiple BE instances can be deployed on a single machine, but **can only deploy one FE**. If you need three copies of data, you need at least one BE instance per machine (instead of three BE instances per machine). **Clocks of multiple FE servers must be consistent (allowing a maximum of 5 seconds clock deviation)** -> 4. The test environment can also be tested with only one BE. In the actual production environment, the number of BE instances directly determines the overall query latency. -> 5. All deployment nodes close Swap. - -> Note 2: Number of FE nodes -> -> 1. FE roles are divided into Follower and Observer. (Leader is an elected role in the Follower group, hereinafter referred to as Follower, for the specific meaning, see [Metadata Design Document](./internal/metadata-design).) -> 2. FE node data is at least 1 (1 Follower). When one Follower and one Observer are deployed, high read availability can be achieved. When three Followers are deployed, read-write high availability (HA) can be achieved. -> 3. The number of Followers **must be** odd, and the number of Observers is arbitrary. -> 4. According to past experience, when cluster availability requirements are high (e.g. providing online services), three Followers and one to three Observers can be deployed. For offline business, it is recommended to deploy 1 Follower and 1-3 Observers. - -* **Usually we recommend about 10 to 100 machines to give full play to Doris's performance (3 of them deploy FE (HA) and the rest deploy BE)** -* **Of course, Doris performance is positively correlated with the number and configuration of nodes. With a minimum of four machines (one FE, three BEs, one BE mixed with one Observer FE to provide metadata backup) and a lower configuration, Doris can still run smoothly.** -* **If FE and BE are mixed, we should pay attention to resource competition and ensure that metadata catalogue and data catalogue belong to different disks.** - -#### Broker deployment - -Broker is a process for accessing external data sources, such as hdfs. Usually, a broker instance is deployed on each machine. - -#### Network Requirements - -Doris instances communicate directly over the network. The following table shows all required ports - -| Instance Name | Port Name | Default Port | Communication Direction | Description| -| ---|---|---|---|---| -| BE | be_port | 9060 | FE --> BE | BE for receiving requests from FE| -| BE | webserver\_port | 8040 | BE <--> BE | BE| -| BE | heartbeat\_service_port | 9050 | FE --> BE | the heart beat service port (thrift) on BE, used to receive heartbeat from FE| -| BE | brpc\_port | 8060 | FE <--> BE, BE <--> BE | BE for communication between BEs| -| FE | http_port | 8030 | FE <--> FE, user <--> FE | HTTP server port on FE | -| FE | rpc_port | 9020 | BE --> FE, FE <--> FE | thrift server port on FE, the configuration of each fe needs to be consistent| -| FE | query_port | 9030 | user <--> FE | FE| -| FE | edit\_log_port | 9010 | FE <--> FE | FE| -| Broker | broker ipc_port | 8000 | FE --> Broker, BE --> Broker | Broker for receiving requests| - -> Note: -> -> 1. When deploying multiple FE instances, make sure that the http port configuration of FE is the same. -> 2. Make sure that each port has access in its proper direction before deployment. - -#### IP binding - -Because of the existence of multiple network cards, or the existence of virtual network cards caused by the installation of docker and other environments, the same host may have multiple different ips. Currently Doris does not automatically identify available IP. So when you encounter multiple IP on the deployment host, you must force the correct IP to be specified through the priority\_networks configuration item. - -Priority\_networks is a configuration that both FE and BE have, and the configuration items need to be written in fe.conf and be.conf. This configuration item is used to tell the process which IP should be bound when FE or BE starts. Examples are as follows: - -`priority_networks=10.1.3.0/24` - -This is a representation of [CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing). FE or BE will find the matching IP based on this configuration item as their own local IP. - -**Note**: When priority networks is configured and FE or BE is started, only the correct IP binding of FE or BE is ensured. In ADD BACKEND or ADD FRONTEND statements, you also need to specify IP matching priority networks configuration, otherwise the cluster cannot be established. Give an example: - -BE is configured as `priority_networks = 10.1.3.0/24'.`. - -When you want to ADD BACKEND use: `ALTER SYSTEM ADD BACKEND "192.168.0.1:9050";` - -Then FE and BE will not be able to communicate properly. - -At this point, DROP must remove the BE that added errors and re-use the correct IP to perform ADD BACKEND. - -FE is the same. - -BROKER does not currently have, nor does it need, priority\_networks. Broker's services are bound to 0.0.0 by default. Simply execute the correct accessible BROKER IP when ADD BROKER is used. - -#### Table Name Case Sensitivity Setting - -By default, doris is case-sensitive. If there is a need for case-insensitive table names, you need to set it before cluster initialization. The table name case sensitivity cannot be changed after cluster initialization is completed. - -See the section on `lower_case_table_names` variables in [Variables](../administrator-guide/variables.md) for details. - -## Cluster deployment - -### Manual deployment - -#### Deploy FE - -* Copy the FE deployment file to the specified node - - Copy the Fe folder under output generated by source code compilation to the node specified deployment path of FE and enter this directory. - -* Configure FE - - 1. The configuration file is conf/fe.conf. Note: `meta_dir` indicates the Metadata storage location. The default value is `${DORIS_HOME}/doris-meta`. The directory needs to be **created manually**. - 2. JAVA_OPTS in fe.conf defaults to a maximum heap memory of 4GB for java, and it is recommended that the production environment be adjusted to more than 8G. - -* Start FE - - `bin/start_fe.sh --daemon` - - The FE process starts and enters the background execution. Logs are stored in the log/ directory by default. If startup fails, you can view error messages by looking at log/fe.log or log/fe.out. - -* For deployment of multiple FEs, see the section "FE scaling and downsizing" - -#### Deploy BE - -* Copy BE deployment files to all nodes to deploy BE - - Copy the be folder under output generated by source code compilation to the specified deployment path of the BE node. - - > Note: The `output/be/lib/debug_info/` directory is for debug information files, the file size is big, but they are not needed ar runtime and can be deployed without them. - -* Modify all BE configurations - - Modify be/conf/be.conf. Mainly configure `storage_root_path`: data storage directory. The default is be/storage, this directory needs to be **created manually** by. In multi directories case, using `;` separation (do not add `;` after the last directory). - - eg.1: - - Note: For SSD disks, '.SSD 'is followed by the directory, and for HDD disks,'.HDD 'is followed by the directory - - `storage_root_path=/home/disk1/doris.HDD,50;/home/disk2/doris.SSD,1;/home/disk2/doris` - - **instructions** - - * 1./home/disk1/doris.HDD,50, indicates capacity limit is 50GB, HDD; - * 2./home/disk2/doris.SSD,1, indicates capacity limit is 1GB, SSD; - * 3./home/disk2/doris, indicates capacity limit is disk capacity, HDD(default) - - eg.2: - - Note: you do not need to add the suffix to either HDD or SSD disk directories. You only need to set the medium parameter - - `storage_root_path=/home/disk1/doris,medium:hdd,capacity:50;/home/disk2/doris,medium:ssd,capacity:50` - - **instructions** - - * 1./home/disk1/doris,medium:hdd,capacity:10,capacity limit is 10GB, HDD; - * 2./home/disk2/doris,medium:ssd,capacity:50,capacity limit is 50GB, SSD; - -* BE webserver_port configuration - - If the Be componet is installed in hadoop cluster , need to change configuration `webserver_port=8040` to avoid port used. - -* Add all BE nodes to FE - - BE nodes need to be added in FE before they can join the cluster. You can use mysql-client([Download MySQL 5.7](https://dev.mysql.com/downloads/mysql/5.7.html)) to connect to FE: - - `./mysql-client -h fe_host -P query_port -uroot` - - The fe_host is the node IP where FE is located; the query_port in fe/conf/fe.conf; the root account is used by default and no password is used to login. - - After login, execute the following commands to add each BE: - - `ALTER SYSTEM ADD BACKEND "be_host:heartbeat_service_port";` - - The be_host is the node IP where BE is located; the heartbeat_service_port in be/conf/be.conf. - -* Start BE - - `bin/start_be.sh --daemon` - - The BE process will start and go into the background for execution. Logs are stored in be/log/directory by default. If startup fails, you can view error messages by looking at be/log/be.log or be/log/be.out. - -* View BE status - - Connect to FE using mysql-client and execute `SHOW PROC '/backends'; `View BE operation. If everything is normal, the `Alive`column should be `true`. - -#### (Optional) FS_Broker deployment - -Broker is deployed as a plug-in, independent of Doris. If you need to import data from a third-party storage system, you need to deploy the corresponding Broker. By default, it provides fs_broker to read HDFS ,Baidu cloud BOS and Amazon S3. Fs_broker is stateless and it is recommended that each FE and BE node deploy a Broker. - -* Copy the corresponding Broker directory in the output directory of the source fs_broker to all the nodes that need to be deployed. It is recommended to maintain the same level as the BE or FE directories. - -* Modify the corresponding Broker configuration - - In the corresponding broker/conf/directory configuration file, you can modify the corresponding configuration. - -* Start Broker - - `bin/start_broker.sh --daemon` - -* Add Broker - - To let Doris FE and BE know which nodes Broker is on, add a list of Broker nodes by SQL command. - - Use mysql-client to connect the FE started, and execute the following commands: - - `ALTER SYSTEM ADD BROKER broker_name "broker_host1:broker_ipc_port1","broker_host2:broker_ipc_port2",...;` - - The broker\_host is Broker's node ip; the broker_ipc_port is in the Broker configuration file. - -* View Broker status - - Connect any booted FE using mysql-client and execute the following command to view Broker status: `SHOW PROC '/brokers';` - -**Note: In production environments, daemons should be used to start all instances to ensure that processes are automatically pulled up after they exit, such as [Supervisor](http://supervisord.org/). For daemon startup, in 0.9.0 and previous versions, you need to modify the start_xx.sh scripts to remove the last & symbol**. Starting with version 0.10.0, call `sh start_xx.sh` directly to start. Also refer to [here](https://www.cnblogs.com/lenmom/p/9973401.html) - -## Expansion and contraction - -Doris can easily expand and shrink FE, BE, Broker instances. - -### FE Expansion and Compression - -High availability of FE can be achieved by expanding FE to three top-one nodes. - -Users can login to Master FE through MySQL client. By: - -`SHOW PROC '/frontends';` - -To view the current FE node situation. - -You can also view the FE node through the front-end page connection: ``http://fe_hostname:fe_http_port/frontend`` or ```http://fe_hostname:fe_http_port/system?Path=//frontends```. - -All of the above methods require Doris's root user rights. - -The process of FE node expansion and contraction does not affect the current system operation. - -#### Adding FE nodes - -FE is divided into three roles: Leader, Follower and Observer. By default, a cluster can have only one Leader and multiple Followers and Observers. Leader and Follower form a Paxos selection group. If the Leader goes down, the remaining Followers will automatically select a new Leader to ensure high write availability. Observer synchronizes Leader data, but does not participate in the election. If only one FE is deployed, FE defaults to Leader. - -The first FE to start automatically becomes Leader. On this basis, several Followers and Observers can be added. - -Add Follower or Observer. Connect to the started FE using mysql-client and execute: - -`ALTER SYSTEM ADD FOLLOWER "follower_host:edit_log_port";` - -or - -`ALTER SYSTEM ADD OBSERVER "observer_host:edit_log_port";` - -The follower\_host and observer\_host is the node IP of Follower or Observer, and the edit\_log\_port in its configuration file fe.conf. - -Configure and start Follower or Observer. Follower and Observer are configured with Leader. The following commands need to be executed at the first startup: - -`bin/start_fe.sh --helper host:edit_log_port --daemon` - -The host is the node IP of Leader, and the edit\_log\_port in Lead's configuration file fe.conf. The --helper is only required when follower/observer is first startup. - -View the status of Follower or Observer. Connect to any booted FE using mysql-client and execute: - -```SHOW PROC '/frontends';``` - -You can view the FE currently joined the cluster and its corresponding roles. - -> Notes for FE expansion: -> -> 1. The number of Follower FEs (including Leaders) must be odd. It is recommended that a maximum of three constituent high availability (HA) modes be deployed. -> 2. When FE is in a highly available deployment (1 Leader, 2 Follower), we recommend that the reading service capability of FE be extended by adding Observer FE. Of course, you can continue to add Follower FE, but it's almost unnecessary. -> 3. Usually a FE node can handle 10-20 BE nodes. It is suggested that the total number of FE nodes should be less than 10. Usually three can meet most of the needs. -> 4. The helper cannot point to the FE itself, it must point to one or more existing running Master/Follower FEs. - -#### Delete FE nodes - -Delete the corresponding FE node using the following command: - -```ALTER SYSTEM DROP FOLLOWER[OBSERVER] "fe_host:edit_log_port";``` - -> Notes for FE contraction: -> -> 1. When deleting Follower FE, make sure that the remaining Follower (including Leader) nodes are odd. - -### BE Expansion and Compression - -Users can login to Leader FE through mysql-client. By: - -```SHOW PROC '/backends';``` - -To see the current BE node situation. - -You can also view the BE node through the front-end page connection: ``http://fe_hostname:fe_http_port/backend`` or ``http://fe_hostname:fe_http_port/system?Path=//backends``. - -All of the above methods require Doris's root user rights. - -The expansion and scaling process of BE nodes does not affect the current system operation and the tasks being performed, and does not affect the performance of the current system. Data balancing is done automatically. Depending on the amount of data available in the cluster, the cluster will be restored to load balancing in a few hours to a day. For cluster load, see the [Tablet Load Balancing Document](../administrator-guide/operation/tablet-repair-and-balance.md). - -#### Add BE nodes - -The BE node is added in the same way as in the **BE deployment** section. The BE node is added by the `ALTER SYSTEM ADD BACKEND` command. - -> Notes for BE expansion: -> -> 1. After BE expansion, Doris will automatically balance the data according to the load, without affecting the use during the period. - -#### Delete BE nodes - -There are two ways to delete BE nodes: DROP and DECOMMISSION - -The DROP statement is as follows: - -```ALTER SYSTEM DROP BACKEND "be_host:be_heartbeat_service_port";``` - -**Note: DROP BACKEND will delete the BE directly and the data on it will not be recovered!!! So we strongly do not recommend DROP BACKEND to delete BE nodes. When you use this statement, there will be corresponding error-proof operation hints.** - -DECOMMISSION clause: - -```ALTER SYSTEM DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";``` - -> DECOMMISSION notes: -> -> 1. This command is used to safely delete BE nodes. After the command is issued, Doris attempts to migrate the data on the BE to other BE nodes, and when all data is migrated, Doris automatically deletes the node. -> 2. The command is an asynchronous operation. After execution, you can see that the BE node's isDecommission status is true through ``SHOW PROC '/backends';` Indicates that the node is offline. -> 3. The order **does not necessarily carry out successfully**. For example, when the remaining BE storage space is insufficient to accommodate the data on the offline BE, or when the number of remaining machines does not meet the minimum number of replicas, the command cannot be completed, and the BE will always be in the state of isDecommission as true. -> 4. The progress of DECOMMISSION can be viewed through `SHOW PROC '/backends';` Tablet Num, and if it is in progress, Tablet Num will continue to decrease. -> 5. The operation can be carried out by: -> ```CANCEL ALTER SYSTEM DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";``` -> The order was cancelled. When cancelled, the data on the BE will maintain the current amount of data remaining. Follow-up Doris re-load balancing - -**For expansion and scaling of BE nodes in multi-tenant deployment environments, please refer to the [Multi-tenant Design Document] (./administrator-guide/operation/multi-tenant.md).** - -### Broker Expansion and Shrinkage - -There is no rigid requirement for the number of Broker instances. Usually one physical machine is deployed. Broker addition and deletion can be accomplished by following commands: - -```ALTER SYSTEM ADD BROKER broker_name "broker_host:broker_ipc_port";``` -```ALTER SYSTEM DROP BROKER broker_name "broker_host:broker_ipc_port";``` -```ALTER SYSTEM DROP ALL BROKER broker_name;``` - -Broker is a stateless process that can be started or stopped at will. Of course, when it stops, the job running on it will fail. Just try again. - -## Common Questions - -### Process correlation - -1. How to determine the success of FE process startup - - After the FE process starts, metadata is loaded first. According to the different roles of FE, you can see ```transfer from UNKNOWN to MASTER/FOLLOWER/OBSERVER```in the log. Eventually, you will see the ``thrift server started`` log and connect to FE through MySQL client, which indicates that FE started successfully. - - You can also check whether the startup was successful by connecting as follows: - - `http://fe_host:fe_http_port/api/bootstrap` - - If returned: - - `{"status":"OK","msg":"Success"}` - - The startup is successful, there may be problems in other cases. - - > Note: If you can't see the information of boot failure in fe. log, you may see it in fe. out. - -2. How to determine the success of BE process startup - - After the BE process starts, if there is data before, there may be several minutes of data index loading time. - - If BE is started for the first time or the BE has not joined any cluster, the BE log will periodically scroll the words `waiting to receive first heartbeat from frontend`. BE has not received Master's address through FE's heartbeat and is waiting passively. This error log will disappear after ADD BACKEND in FE sends the heartbeat. If the word `````master client', get client from cache failed. host:, port: 0, code: 7````` master client appears again after receiving heartbeat, it indicates that FE has successfully connected BE, but BE cannot actively connect FE. It may be necessary to check the connectivity of rpc_port from BE to FE. - - If BE has been added to the cluster, the heartbeat log from FE should be scrolled every five seconds: ```get heartbeat, host:xx. xx.xx.xx, port:9020, cluster id:xxxxxxx```, indicating that the heartbeat is normal. - - Secondly, the word `finish report task success. return code: 0` should be scrolled every 10 seconds in the log to indicate that BE's communication to FE is normal. - - At the same time, if there is a data query, you should see the rolling logs, and have `execute time is xxx` logs, indicating that BE started successfully, and the query is normal. - - You can also check whether the startup was successful by connecting as follows: - - `http://be_host:be_http_port/api/health` - - If returned: - - `{"status": "OK","msg": "To Be Added"}` - - If the startup is successful, there may be problems in other cases. - - > Note: If you can't see the information of boot failure in be.INFO, you may see it in be.out. - -3. How to determine the normal connectivity of FE and BE after building the system - - Firstly, confirm that FE and BE processes have been started separately and normally, and confirm that all nodes have been added through `ADD BACKEND` or `ADD FOLLOWER/OBSERVER` statements. - - If the heartbeat is normal, BE logs will show ``get heartbeat, host:xx.xx.xx.xx, port:9020, cluster id:xxxxx`` If the heartbeat fails, the words ```backend [10001] get Exception: org.apache.thrift.transport.TTransportException``` will appear in FE's log, or other thrift communication abnormal log, indicating that the heartbeat fails from FE to 10001 BE. Here you need to check the connectivity of FE to BE host's heart-beating port. - - If BE's communication to FE is normal, the BE log will display the words `finish report task success. return code: 0`. Otherwise, the words `master client`, get client from cache failed` will appear. In this case, the connectivity of BE to the rpc_port of FE needs to be checked. - -4. Doris Node Authentication Mechanism - - In addition to Master FE, the other role nodes (Follower FE, Observer FE, Backend) need to register to the cluster through the `ALTER SYSTEM ADD` statement before joining the cluster. - - When Master FE is first started, a cluster_id is generated in the doris-meta/image/VERSION file. - - When FE first joins the cluster, it first retrieves the file from Master FE. Each subsequent reconnection between FEs (FE reboot) checks whether its cluster ID is the same as that of other existing FEs. If different, the FE will exit automatically. - - When BE first receives the heartbeat of Master FE, it gets the cluster ID from the heartbeat and records it in the `cluster_id` file of the data directory. Each heartbeat after that compares to the cluster ID sent by FE. If cluster IDs are not equal, BE will refuse to respond to FE's heartbeat. - - The heartbeat also contains Master FE's ip. When FE cuts the master, the new Master FE will carry its own IP to send the heartbeat to BE, BE will update its own saved Master FE ip. - - > **priority\_network** - > - > priority network is that both FE and BE have a configuration. Its main purpose is to assist FE or BE to identify their own IP addresses in the case of multi-network cards. Priority network is represented by CIDR: [RFC 4632](https://tools.ietf.org/html/rfc4632) - > - > When the connectivity of FE and BE is confirmed to be normal, if the table Timeout still occurs, and the FE log has an error message with the words `backend does not find. host:xxxx.xxx.XXXX`. This means that there is a problem with the IP address that Doris automatically identifies and that priority\_network parameters need to be set manually. - > - > The main reason for this problem is that when the user adds BE through the `ADD BACKEND` statement, FE recognizes whether the statement specifies hostname or IP. If it is hostname, FE automatically converts it to an IP address and stores it in metadata. When BE reports on the completion of the task, it carries its own IP address. If FE finds that BE reports inconsistent IP addresses and metadata, it will make the above error. - > - > Solutions to this error: 1) Set **priority\_network** parameters in FE and BE respectively. Usually FE and BE are in a network segment, so this parameter can be set to the same. 2) Fill in the `ADD BACKEND` statement directly with the correct IP address of BE instead of hostname to avoid FE getting the wrong IP address. - -5. File descriptor number of BE process - - The number of file descriptor of BE process is controlled by the two parameters min_file_descriptor_number/max_file_descriptor_number. - - If it is not in the [min_file_descriptor_number, max_file_descriptor_number] interval, error will occurs when starting BE process. - - Please using ulimit command to set file descriptor under this circumstance. - - The default value of min_file_descriptor_number is 65536. - - The default value of max_file_descriptor_number is 131072. - - For Example: ulimit -n 65536; this command set file descriptor to 65536. - - After starting BE process, you can use **cat /proc/$pid/limits** to see the actual limit of process. diff --git a/docs/en/installing/upgrade.md b/docs/en/installing/upgrade.md deleted file mode 100644 index f5d6252571..0000000000 --- a/docs/en/installing/upgrade.md +++ /dev/null @@ -1,86 +0,0 @@ ---- -{ - "title": "Cluster upgrade", - "language": "en" -} ---- - - - - -# Cluster upgrade - -Doris can upgrade smoothly by rolling upgrades. The following steps are recommended for security upgrade. - -> **Note:** -> 1. Doris does not support upgrading across two-digit version numbers, for example: you cannot upgrade directly from 0.13 to 0.15, only through 0.13.x -> 0.14.x -> 0.15.x, and the three-digit version number can be upgraded across versions, such as from 0.13 .15 can be directly upgraded to 0.14.13.1, it is not necessary to upgrade 0.14.7 or 0.14.12.1 -> 1. The following approaches are based on highly available deployments. That is, data 3 replicas, FE high availability. - -## Preparen - -1. Turn off the replica repair and balance operation. - - There will be node restarts during the upgrade process, so unnecessary cluster balancing and replica repair logic may be triggered. You can close it first with the following command: - - ``` - # Turn off the replica ealance logic. After it is closed, the balancing operation of the ordinary table replica will no longer be triggered. - $ mysql-client> admin set frontend config("disable_balance" = "true"); - - # Turn off the replica balance logic of the colocation table. After it is closed, the replica redistribution operation of the colocation table will no longer be triggered. - $ mysql-client> admin set frontend config("disable_colocate_balance" = "true"); - - # Turn off the replica scheduling logic. After shutting down, all generated replica repair and balancing tasks will no longer be scheduled. - $ mysql-client> admin set frontend config("disable_tablet_scheduler" = "true"); - ``` - - After the cluster is upgraded, just use the above command to set the corresponding configuration to the original value. - -2. **important! ! Metadata needs to be backed up before upgrading(The entire directory needs to be backed up)! !** - -## Test the correctness of BE upgrade - -1. Arbitrarily select a BE node and deploy the latest palo_be binary file. -2. Restart the BE node and check the BE log be.INFO to see if the boot was successful. -3. If the startup fails, you can check the reason first. If the error is not recoverable, you can delete the BE directly through DROP BACKEND, clean up the data, and restart the BE using the previous version of palo_be. Then re-ADD BACKEND. (**This method will result in the loss of a copy of the data, please make sure that three copies are complete, and perform this operation!!!** - -## Testing FE Metadata Compatibility - -0. **Important! Exceptional metadata compatibility is likely to cause data cannot be restored!!** -1. Deploy a test FE process (such as your own local developer) using the new version alone. -2. Modify the FE configuration file fe.conf for testing and set all ports to **different from online**. -3. Add configuration in fe.conf: cluster_id=123456 -4. Add the configuration in fe.conf: metadatafailure_recovery=true -5. Copy the metadata directory doris-meta of the online environment Master FE to the test environment -6. Modify the cluster_id in the doris-meta/image/VERSION file copied into the test environment to 123456 (that is, the same as in Step 3) -7. run sh bin/start_fe.sh in the test environment. -8. Observe whether the start-up is successful through FE log fe.log. -9. If the startup is successful, run sh bin/stop_fe.sh to stop the FE process of the test environment. -10. **The purpose of the above 2-6 steps is to prevent the FE of the test environment from being misconnected to the online environment after it starts.** - -## Upgrade preparation - -1. After data validation, the new version of BE and FE binary files are distributed to their respective directories. -2. Usually small version upgrade, BE only needs to upgrade palo_be; FE only needs to upgrade palo-fe.jar. If it is a large version upgrade, you may need to upgrade other files (including but not limited to bin / lib / etc.) If you are not sure whether you need to replace other files, it is recommended to replace all of them. - -## rolling upgrade - -1. Confirm that the new version of the file is deployed. Restart FE and BE instances one by one. -2. It is suggested that BE be restarted one by one and FE be restarted one by one. Because Doris usually guarantees backward compatibility between FE and BE, that is, the old version of FE can access the new version of BE. However, the old version of BE may not be supported to access the new version of FE. -3. It is recommended to restart the next instance after confirming the previous instance started successfully. Refer to the Installation Deployment Document for the identification of successful instance startup. diff --git a/docs/en/internal/Flink doris connector Design.md b/docs/en/internal/Flink doris connector Design.md deleted file mode 100644 index 05481c67bf..0000000000 --- a/docs/en/internal/Flink doris connector Design.md +++ /dev/null @@ -1,259 +0,0 @@ ---- -{ - "title": "Flink doris connector Design", - "language": "en" -} - - ---- - - - - -# Doris Storage File Format Optimization # - -## File format ## - -![](/images/segment_v2.png) -
1. doris segment
- -Documents include: -- The file starts with an 8-byte magic code to identify the file format and version -- Data Region: Used to store data information for each column, where the data is loaded on demand by pages. -- Index Region: Doris stores the index data of each column in Index Region, where the data is loaded according to column granularity, so the data information of the following column is stored separately. -- Footer - - FileFooterPB: Metadata Information for Definition Files - - Checksum of 4 bytes of footer Pb content - - Four bytes FileFooterPB message length for reading FileFooterPB - - The 8 byte MAGIC CODE is stored in the last bit to facilitate the identification of file types in different scenarios. - -The data in the file is organized in the form of page, which is the basic unit of coding and compression. Current page types include the following: - -### DataPage ### - -Data Page is divided into two types: nullable and non-nullable data pages. - -Nullable's data page includes: -``` - - +----------------+ - | value count | - |----------------| - | first row id | - |----------------| - | bitmap length | - |----------------| - | null bitmap | - |----------------| - | data | - |----------------| - | checksum | - +----------------+ -``` - -non -zero data page32467;- 26500;- 229140;- - -``` - |----------------| - | value count | - |----------------| - | first row id | - |----------------| - | data | - |----------------| - | checksum | - +----------------+ -``` - -The meanings of each field are as follows: - -- value count - - Represents the number of rows in a page -- First row id - - Line number of the first line in page -- bitmap length - - Represents the number of bytes in the next bitmap -- null bitmap - - bitmap representing null information -- Data - - Store data after encoding and compress - - You need to write in the header information of the data: is_compressed - - Various kinds of data encoded by different codes need to write some field information in the header information in order to achieve data parsing. - - TODO: Add header information for various encodings -- Checksum - - Store page granularity checksum, including page header and subsequent actual data - - -### Bloom Filter Pages ### - -For each bloom filter column, a page of the bloom filter is generated corresponding to the granularity of the page and saved in the bloom filter pages area. - -### Ordinal Index Page ### - -For each column, a sparse index of row numbers is established according to page granularity. The content is a pointer to the block (including offset and length) for the line number of the start line of the page - -### Short Key Index page ### - -We generate a sparse index of short key every N rows (configurable) with the contents of short key - > line number (ordinal) - -### Column's other indexes ### - -The format design supports the subsequent expansion of other index information, such as bitmap index, spatial index, etc. It only needs to write the required data to the existing column data, and add the corresponding metadata fields to FileFooterPB. - -### Metadata Definition ### -SegmentFooterPB is defined as: - -``` -message ColumnPB { - required int32 unique_id = 1; // The column id is used here, and the column name is not used - optional string name = 2; // Column name, when name equals __DORIS_DELETE_SIGN__, this column is a hidden delete column - required string type = 3; // Column type - optional bool is_key = 4; // Whether column is a primary key column - optional string aggregation = 5; // Aggregate type - optional bool is_nullable = 6; // Whether column is allowed to assgin null - optional bytes default_value = 7; // Defalut value - optional int32 precision = 8; // Precision of column - optional int32 frac = 9; - optional int32 length = 10; // Length of column - optional int32 index_length = 11; // Length of column index - optional bool is_bf_column = 12; // Whether column has bloom filter index - optional bool has_bitmap_index = 15 [default=false]; // Whether column has bitmap index -} - -// page offset -message PagePointerPB { - required uint64 offset; // offset of page in segment file - required uint32 length; // length of page -} - -message MetadataPairPB { - optional string key = 1; - optional bytes value = 2; -} - -message ColumnMetaPB { - optional ColumnMessage encoding; // Encoding of column - - optional PagePointerPB dict_page // Dictionary page - repeated PagePointerPB bloom_filter_pages; // Bloom filter pages - optional PagePointerPB ordinal_index_page; // Ordinal index page - optional PagePointerPB page_zone_map_page; // Page level of statistics index data - - optional PagePointerPB bitmap_index_page; // Bitmap index page - - optional uint64 data_footprint; // The size of the index in the column - optional uint64 index_footprint; // The size of the data in the column - optional uint64 raw_data_footprint; // Original column data size - - optional CompressKind compress_kind; // Column compression type - - optional ZoneMapPB column_zone_map; // Segment level of statistics index data - repeated MetadataPairPB column_meta_datas; -} - -message SegmentFooterPB { - optional uint32 version = 2 [default = 1]; // For version compatibility and upgrade use - repeated ColumnPB schema = 5; // Schema of columns - optional uint64 num_values = 4; // Number of lines saved in the file - optional uint64 index_footprint = 7; // Index size - optional uint64 data_footprint = 8; // Data size - optional uint64 raw_data_footprint = 8; // Original data size - - optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // Compression type - repeated ColumnMetaPB column_metas = 10; // Column metadata - optional PagePointerPB key_index_page = 11; // short key index page -} - -``` - -## Read-write logic ## - -### Write ### - -The general writing process is as follows: -1. Write magic -2. Generate corresponding Column Writer according to schema information. Each Column Writer obtains corresponding encoding information (configurable) according to different types, and generates corresponding encoder according to encoding. -3. Call encoder - > add (value) for data writing. Each K line generates a short key index entry, and if the current page satisfies certain conditions (the size exceeds 1M or the number of rows is K), a new page is generated and cached in memory. -4. Continuous cycle step 3 until data writing is completed. Brush the data of each column into the file in sequence -5. Generate FileFooterPB information and write it to the file. - -Relevant issues: - -- How does the index of short key be generated? - - Now we still generate a short key sparse index according to how many rows are sparse, and keep a short sparse index generated every 1024 rows. The specific content is: short key - > ordinal - -- What should be stored in the ordinal index? - - Store the first ordinal to page pointer mapping information for pages -- What are stored in pages of different encoding types? - - Dictionary Compression - - plain - - rle - - bshuf - -### Read ### - -1. Read the magic of the file and judge the type and version of the file. -2. Read FileFooterPB and check sum -3. Read short key index and data ordinal index information of corresponding columns according to required columns -4. Use start key and end key, locate the row number to be read through short key index, then determine the row ranges to be read through ordinal index, and filter the row ranges to be read through statistics, bitmap index and so on. -5. Then read row data through ordinal index according to row ranges - -Relevant issues: -1. How to quickly locate a row within the page? - - The data inside the page is encoding, so it cannot locate the row-level data quickly. Different encoding methods have different schemes for fast line number positioning in-house, which need to be analyzed concretely: - - If it is rle-coded, skip is performed by resolving the head of RLE until the RLE block containing the row is reached, and then the reverse solution is performed. - - binary plain encoding: offset information will be stored in the page, and offset information will be specified in the page header. When reading, offset information will be parsed into the array first, so that you can quickly locate the data of a row of block through offset data information of each row. -2. How to achieve efficient block reading? Consider merging adjacent blocks while they are being read, one-time reading? -This requires judging whether the block is continuous at the time of reading, and if it is continuous, reading it once. - -## Coding ## - -In the existing Doris storage, plain encoding is adopted for string type encoding, which is inefficient. After comparison, it is found that in Baidu statistics scenario, data will expand more than twice because of string type coding. Therefore, it is planned to introduce dictionary-based coding compression. - -## Compression ## - -It implements a scalable compression framework, supports a variety of compression algorithms, facilitates the subsequent addition of new compression algorithms, and plans to introduce zstd compression. - -## TODO ## -1. How to implement nested types? How to locate line numbers in nested types? -2. How to optimize the downstream bitmap and column statistics caused by ScanRange splitting? diff --git a/docs/en/internal/grouping_sets_design.md b/docs/en/internal/grouping_sets_design.md deleted file mode 100644 index 16acc33997..0000000000 --- a/docs/en/internal/grouping_sets_design.md +++ /dev/null @@ -1,501 +0,0 @@ ---- -{ - "title": "GROUPING SETS DESIGN", - "language": "en" -} ---- - - -# GROUPING SETS DESIGN - -## 1. GROUPING SETS Background - -The `CUBE`, `ROLLUP`, and `GROUPING` `SETS` extensions to SQL make querying and reporting easier and faster. `CUBE`, `ROLLUP`, and grouping sets produce a single result set that is equivalent to a `UNION` `ALL` of differently grouped rows. `ROLLUP` calculates aggregations such as `SUM`, `COUNT`, `MAX`, `MIN`, and `AVG` at increasing levels of aggregation, from the most detailed up to a grand total. `CUBE` is an extension similar to `ROLLUP`, enabling a single statement to calculate all possible combinations of aggregations. The `CUBE`, `ROLLUP`, and the `GROUPING` `SETS` extension lets you specify just the groupings needed in the `GROUP` `BY` clause. This allows efficient analysis across multiple dimensions without performing a `CUBE` operation. Computing a `CUBE` creates a heavy processing load, so replacing cubes with grouping sets can significantly increase performance. -To enhance performance, `CUBE`, `ROLLUP`, and `GROUPING SETS` can be parallelized: multiple processes can simultaneously execute all of these statements. These capabilities make aggregate calculations more efficient, thereby enhancing database performance, and scalability. - -The three `GROUPING` functions help you identify the group each row belongs to and enable sorting subtotal rows and filtering results. - -### 1.1 GROUPING SETS Syntax - -`GROUPING SETS` syntax lets you define multiple groupings in the same query. `GROUP BY` computes all the groupings specified and combines them with `UNION ALL`. For example, consider the following statement: - -``` -SELECT k1, k2, SUM( k3 ) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k1), (k2), ( ) ); -``` - - -This statement is equivalent to: - -``` -SELECT k1, k2, SUM( k3 ) FROM t GROUP BY k1, k2 -UNION -SELECT k1, null, SUM( k3 ) FROM t GROUP BY k1 -UNION -SELECT null, k2, SUM( k3 ) FROM t GROUP BY k2 -UNION -SELECT null, null, SUM( k3 ) FROM t -``` - -This is an example of real query: - -``` -mysql> SELECT * FROM t; -+------+------+------+ -| k1 | k2 | k3 | -+------+------+------+ -| a | A | 1 | -| a | A | 2 | -| a | B | 1 | -| a | B | 3 | -| b | A | 1 | -| b | A | 4 | -| b | B | 1 | -| b | B | 5 | -+------+------+------+ -8 rows in set (0.01 sec) - -mysql> SELECT k1, k2, SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ); -+------+------+-----------+ -| k1 | k2 | sum(`k3`) | -+------+------+-----------+ -| b | B | 6 | -| a | B | 4 | -| a | A | 3 | -| b | A | 5 | -| NULL | B | 10 | -| NULL | A | 8 | -| a | NULL | 7 | -| b | NULL | 11 | -| NULL | NULL | 18 | -+------+------+-----------+ -9 rows in set (0.06 sec) -``` - -### 1.2 ROLLUP Syntax - -`ROLLUP` enables a `SELECT` statement to calculate multiple levels of subtotals across a specified group of dimensions. It also calculates a grand total. `ROLLUP` is a simple extension to the `GROUP` `BY` clause, so its syntax is extremely easy to use. The `ROLLUP` extension is highly efficient, adding minimal overhead to a query. - -`ROLLUP` appears in the `GROUP` `BY` clause in a `SELECT` statement. Its form is: - -``` -SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY ROLLUP(a,b,c) -``` - -This statement is equivalent to GROUPING SETS as followed: - -``` -GROUPING SETS ( -(a,b,c), -( a, b ), -( a), -( ) -) -``` - -### 1.3 CUBE Syntax - -Like `ROLLUP` `CUBE` generates all the subtotals that could be calculated for a data cube with the specified dimensions. - -``` -SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY CUBE(a,b,c) -``` - -e.g. CUBE ( a, b, c ) is equivalent to GROUPING SETS as followed: - -``` -GROUPING SETS ( -( a, b, c ), -( a, b ), -( a, c ), -( a ), -( b, c ), -( b ), -( c ), -( ) -) -``` - -### 1.4 GROUPING and GROUPING_ID Function - -Indicates whether a specified column expression in a `GROUP BY` list is aggregated or not. `GROUPING `returns 1 for aggregated or 0 for not aggregated in the result set. `GROUPING` can be used only in the `SELECT` list, `HAVING`, and `ORDER BY` clauses when `GROUP BY` is specified. - -`GROUPING_ID` describes which of a list of expressions are grouped in a row produced by a `GROUP BY` query. The `GROUPING_ID` function simply returns the decimal equivalent of the binary value formed as a result of the concatenation of the values returned by the `GROUPING` functions. - -Each `GROUPING_ID` argument must be an element of the `GROUP BY` list. `GROUPING_ID ()` returns an **integer** bitmap whose lowest N bits may be lit. A lit **bit** indicates the corresponding argument is not a grouping column for the given output row. The lowest-order **bit** corresponds to argument N, and the N-1th lowest-order **bit** corresponds to argument 1. If the column is a grouping column the bit is 0 else is 1. - -For example: - -``` -mysql> select * from t; -+------+------+------+ -| k1 | k2 | k3 | -+------+------+------+ -| a | A | 1 | -| a | A | 2 | -| a | B | 1 | -| a | B | 3 | -| b | A | 1 | -| b | A | 4 | -| b | B | 1 | -| b | B | 5 | -+------+------+------+ -``` - -grouping sets result: - -``` -mysql> SELECT k1, k2, GROUPING(k1), GROUPING(k2), SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ); -+------+------+----------------+----------------+-----------+ -| k1 | k2 | grouping(`k1`) | grouping(`k2`) | sum(`k3`) | -+------+------+----------------+----------------+-----------+ -| a | A | 0 | 0 | 3 | -| a | B | 0 | 0 | 4 | -| a | NULL | 0 | 1 | 7 | -| b | A | 0 | 0 | 5 | -| b | B | 0 | 0 | 6 | -| b | NULL | 0 | 1 | 11 | -| NULL | A | 1 | 0 | 8 | -| NULL | B | 1 | 0 | 10 | -| NULL | NULL | 1 | 1 | 18 | -+------+------+----------------+----------------+-----------+ -9 rows in set (0.02 sec) - -mysql> SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ); -+------+------+-------------------------+-----------+ -| k1 | k2 | grouping_id(`k1`, `k2`) | sum(`k3`) | -+------+------+-------------------------+-----------+ -| a | A | 0 | 3 | -| a | B | 0 | 4 | -| a | NULL | 1 | 7 | -| b | A | 0 | 5 | -| b | B | 0 | 6 | -| b | NULL | 1 | 11 | -| NULL | A | 2 | 8 | -| NULL | B | 2 | 10 | -| NULL | NULL | 3 | 18 | -+------+------+-------------------------+-----------+ -9 rows in set (0.02 sec) - -mysql> SELECT k1, k2, grouping(k1), grouping(k2), GROUPING_ID(k1,k2), SUM(k4) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ) order by k1, k2; -+------+------+----------------+----------------+-------------------------+-----------+ -| k1 | k2 | grouping(`k1`) | grouping(`k2`) | grouping_id(`k1`, `k2`) | sum(`k4`) | -+------+------+----------------+----------------+-------------------------+-----------+ -| a | A | 0 | 0 | 0 | 3 | -| a | B | 0 | 0 | 0 | 4 | -| a | NULL | 0 | 1 | 1 | 7 | -| b | A | 0 | 0 | 0 | 5 | -| b | B | 0 | 0 | 0 | 6 | -| b | NULL | 0 | 1 | 1 | 11 | -| NULL | A | 1 | 0 | 2 | 8 | -| NULL | B | 1 | 0 | 2 | 10 | -| NULL | NULL | 1 | 1 | 3 | 18 | -+------+------+----------------+----------------+-------------------------+-----------+ -9 rows in set (0.02 sec) - -``` -### 1.5 Composition and nesting of GROUPING SETS - -First of all, a GROUP BY clause is essentially a special case of GROUPING SETS, for example: - -``` - GROUP BY a -is equivalent to: - GROUP BY GROUPING SETS((a)) -also, - GROUP BY a,b,c -is equivalent to: - GROUP BY GROUPING SETS((a,b,c)) -``` - -Similarly, CUBE and ROLLUP can be expanded into GROUPING SETS, so the various combinations and nesting of GROUP BY, CUBE, ROLLUP, GROUPING SETS are essentially the combination and nesting of GROUPING SETS. - -For GROUPING SETS nesting, it is semantically equivalent to writing the statements inside the nest directly outside. (ref:) mentions: - -``` -The CUBE and ROLLUP constructs can be used either directly in the GROUP BY clause, or nested inside a GROUPING SETS clause. If one GROUPING SETS clause is nested inside another, the effect is the same as if all the elements of the inner clause had been written directly in the outer clause. -``` - -For a combined list of multiple GROUPING SETS, many databases consider it a cross product relationship. - -for example: - -``` -GROUP BY a, CUBE (b, c), GROUPING SETS ((d), (e)) - -is equivalent to: - -GROUP BY GROUPING SETS ( -(a, b, c, d), (a, b, c, e), -(a, b, d), (a, b, e), -(a, c, d), (a, c, e), -(a, d), (a, e) -) -``` - -For the combination and nesting of GROUPING SETS, each database support is not the same. For example snowflake does not support any combination and nesting. -() - -Oracle supports both composition and nesting. -() - -Presto supports composition, but not nesting. -() - -## 2. Object - -Support `GROUPING SETS`, `ROLLUP` and `CUBE ` syntax, implements 1.1, 1.2, 1.3 1.4, 1.5, not support the combination - and nesting of GROUPING SETS in current version. - -### 2.1 GROUPING SETS Syntax - -``` -SELECT ... -FROM ... -[ ... ] -GROUP BY GROUPING SETS ( groupSet [ , groupSet [ , ... ] ] ) -[ ... ] - -groupSet ::= { ( expr [ , expr [ , ... ] ] )} - - -Expression, column name. -``` - -### 2.2 ROLLUP Syntax - -``` -SELECT ... -FROM ... -[ ... ] -GROUP BY ROLLUP ( expr [ , expr [ , ... ] ] ) -[ ... ] - - -Expression, column name. -``` - -### 2.3 CUBE Syntax - -``` -SELECT ... -FROM ... -[ ... ] -GROUP BY CUBE ( expr [ , expr [ , ... ] ] ) -[ ... ] - - -Expression, column name. -``` - -## 3. Implementation - -### 3.1 Overall Design Approaches - -For `GROUPING SET` is equivalent to the `UNION` of `GROUP BY` . So we can expand input rows, and run an GROUP BY on these rows. - -For example: - -``` -SELECT a, b FROM src GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ()); -``` - -Data in table src: - -``` -1, 2 -3, 4 -``` - -Base on GROUPING SETS , we can expend the input to: - -``` -1, 2 (GROUPING_ID: a, b -> 00 -> 0) -1, null (GROUPING_ID: a, null -> 01 -> 1) -null, 2 (GROUPING_ID: null, b -> 10 -> 2) -null, null (GROUPING_ID: null, null -> 11 -> 3) - -3, 4 (GROUPING_ID: a, b -> 00 -> 0) -3, null (GROUPING_ID: a, null -> 01 -> 1) -null, 4 (GROUPING_ID: null, b -> 10 -> 2) -null, null (GROUPING_ID: null, null -> 11 -> 3) -``` - -And then use those row as input, then GROUP BY a, b, GROUPING_ID - -### 3.2 Example - -Table t: - -``` -mysql> select * from t; -+------+------+------+ -| k1 | k2 | k3 | -+------+------+------+ -| a | A | 1 | -| a | A | 2 | -| a | B | 1 | -| a | B | 3 | -| b | A | 1 | -| b | A | 4 | -| b | B | 1 | -| b | B | 5 | -+------+------+------+ -8 rows in set (0.01 sec) -``` - -for the query: - -``` -SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ((k1, k2), (k1), (k2), ()); -``` - -First, expand the input, every row expand into 4 rows ( the size of GROUPING SETS), and insert GROUPING_ID column - -e.g. a, A, 1 expanded to: - -``` -+------+------+------+-------------------------+ -| k1 | k2 | k3 | GROUPING_ID(`k1`, `k2`) | -+------+------+------+-------------------------+ -| a | A | 1 | 0 | -| a | NULL | 1 | 1 | -| NULL | A | 1 | 2 | -| NULL | NULL | 1 | 3 | -+------+------+------+-------------------------+ -``` - -Finally, all rows expended as follows (32 rows): - -``` -+------+------+------+-------------------------+ -| k1 | k2 | k3 | GROUPING_ID(`k1`, `k2`) | -+------+------+------+-------------------------+ -| a | A | 1 | 0 | -| a | A | 2 | 0 | -| a | B | 1 | 0 | -| a | B | 3 | 0 | -| b | A | 1 | 0 | -| b | A | 4 | 0 | -| b | B | 1 | 0 | -| b | B | 5 | 0 | -| a | NULL | 1 | 1 | -| a | NULL | 1 | 1 | -| a | NULL | 2 | 1 | -| a | NULL | 3 | 1 | -| b | NULL | 1 | 1 | -| b | NULL | 1 | 1 | -| b | NULL | 4 | 1 | -| b | NULL | 5 | 1 | -| NULL | A | 1 | 2 | -| NULL | A | 1 | 2 | -| NULL | A | 2 | 2 | -| NULL | A | 4 | 2 | -| NULL | B | 1 | 2 | -| NULL | B | 1 | 2 | -| NULL | B | 3 | 2 | -| NULL | B | 5 | 2 | -| NULL | NULL | 1 | 3 | -| NULL | NULL | 1 | 3 | -| NULL | NULL | 1 | 3 | -| NULL | NULL | 1 | 3 | -| NULL | NULL | 2 | 3 | -| NULL | NULL | 3 | 3 | -| NULL | NULL | 4 | 3 | -| NULL | NULL | 5 | 3 | -+------+------+------+-------------------------+ -32 rows in set. -``` - -now GROUP BY k1, k2, GROUPING_ID(k1,k2): - -``` -+------+------+-------------------------+-----------+ -| k1 | k2 | grouping_id(`k1`, `k2`) | sum(`k3`) | -+------+------+-------------------------+-----------+ -| a | A | 0 | 3 | -| a | B | 0 | 4 | -| a | NULL | 1 | 7 | -| b | A | 0 | 5 | -| b | B | 0 | 6 | -| b | NULL | 1 | 11 | -| NULL | A | 2 | 8 | -| NULL | B | 2 | 10 | -| NULL | NULL | 3 | 18 | -+------+------+-------------------------+-----------+ -9 rows in set (0.02 sec) -``` - -The result is equivalent to the UNION ALL - -``` -select k1, k2, sum(k3) from t group by k1, k2 -UNION ALL -select NULL, k2, sum(k3) from t group by k2 -UNION ALL -select k1, NULL, sum(k3) from t group by k1 -UNION ALL -select NULL, NULL, sum(k3) from t; - -+------+------+-----------+ -| k1 | k2 | sum(`k3`) | -+------+------+-----------+ -| b | B | 6 | -| b | A | 5 | -| a | A | 3 | -| a | B | 4 | -| a | NULL | 7 | -| b | NULL | 11 | -| NULL | B | 10 | -| NULL | A | 8 | -| NULL | NULL | 18 | -+------+------+-----------+ -9 rows in set (0.06 sec) -``` - -### 3.3 FE - -#### 3.3.1 Tasks - -1. Add GroupByClause, replace groupingExprs. -2. Add Grouping Sets, Cube and RollUp syntax. -3. Add GroupByClause in SelectStmt. -4. Add GroupingFunctionCallExpr, implements grouping grouping_id function call -5. Add VirtualSlot, generate the map of virtual slots and real slots -6. add virtual column GROUPING_ID and other virtual columns generated by grouping and grouping_id, insert into groupingExprs, -7. Add a PlanNode, name as RepeatNode. For GroupingSets aggregation insert RepeatNode to the plan. - -#### 3.3.2 Tuple - -In order to add GROUPING_ID to groupingExprs in GroupByClause, need to create virtual SlotRef, also, need tot create a tuple for this slot, named GROUPING\_\_ID Tuple. - -For the plannode RepeatNode, its input are all the tuples of its children and its output tuple are the repeat data and GROUPING_ID. - - -#### 3.3.3 Expression and Function Substitution - -expr -> if(bitand(pos, grouping_id)=0, expr, null) for expr in extension grouping clause -grouping_id() -> grouping_id(grouping_id) for grouping_id function - -### 3.4 BE - -#### 3.4.1 Tasks - -1. Add RepeatNode executor, expend the input data and append GROUPING_ID to every row -2. Implements grouping_id() and grouping() function. diff --git a/docs/en/internal/metadata-design.md b/docs/en/internal/metadata-design.md deleted file mode 100644 index 43a2fa89b1..0000000000 --- a/docs/en/internal/metadata-design.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -{ - "title": "Metadata Design Document", - "language": "en" -} ---- - - - - -# Metadata Design Document - -## Noun Interpretation - -* FE: Frontend, the front-end node of Doris. Mainly responsible for receiving and returning client requests, metadata, cluster management, query plan generation and so on. -* BE: Backend, the back-end node of Doris. Mainly responsible for data storage and management, query plan execution and other work. -* bdbje: [Oracle Berkeley DB Java Edition](http://www.oracle.com/technetwork/database/berkeleydb/overview/index-093405.html). In Doris, we use bdbje to persist metadata operation logs and high availability of FE. - -## Overall architecture -![](/images/palo_architecture.jpg) - -As shown above, Doris's overall architecture is divided into two layers. Multiple FEs form the first tier, providing lateral expansion and high availability of FE. Multiple BEs form the second layer, which is responsible for data storage and management. This paper mainly introduces the design and implementation of metadata in FE layer. - -1. There are two different kinds of FE nodes: follower and observer. Leader election and data synchronization are taken among FE nodes by bdbje ([BerkeleyDB Java Edition](http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/overview/index-093405.html)). - -2. The follower node is elected, and one of the followers becomes the leader node, which is responsible for the writing of metadata. When the leader node goes down, other follower nodes re-elect a leader to ensure high availability of services. - -3. The observer node only synchronizes metadata from the leader node and does not participate in the election. It can be scaled horizontally to provide the extensibility of metadata reading services. - -> Note: The concepts of follower and observer corresponding to bdbje are replica and observer. You may use both names below. - -## Metadata structure - -Doris's metadata is in full memory. A complete metadata image is maintained in each FE memory. Within Baidu, a cluster of 2,500 tables and 1 million fragments (3 million copies) occupies only about 2GB of metadata in memory. (Of course, the memory overhead for querying intermediate objects and various job information needs to be estimated according to the actual situation. However, it still maintains a low memory overhead. - -At the same time, metadata is stored in the memory as a whole in a tree-like hierarchical structure. By adding auxiliary structure, metadata information at all levels can be accessed quickly. - -The following figure shows the contents stored in Doris meta-information. - -![](/images/metadata_contents.png) - -As shown above, Doris's metadata mainly stores four types of data: - -1. User data information. Including database, table Schema, fragmentation information, etc. -2. All kinds of job information. For example, import jobs, Clone jobs, SchemaChange jobs, etc. -3. User and permission information. -4. Cluster and node information. - -## Data stream - -![](/images/metadata_stream.png) - -The data flow of metadata is as follows: - -1. Only leader FE can write metadata. After modifying leader's memory, the write operation serializes into a log and writes to bdbje in the form of key-value. The key is a continuous integer, and as log id, value is the serialized operation log. - -2. After the log is written to bdbje, bdbje copies the log to other non-leader FE nodes according to the policy (write most/write all). The non-leader FE node modifies its metadata memory image by playback of the log, and completes the synchronization with the metadata of the leader node. - -3. When the number of log bars of the leader node reaches the threshold (default 10W bars), the checkpoint thread is started. Checkpoint reads existing image files and subsequent logs and replays a new mirror copy of metadata in memory. The copy is then written to disk to form a new image. The reason for this is to regenerate a mirror copy instead of writing an existing image to an image, mainly considering that the write operation will be blocked during writing the image plus read lock. So every checkpoint takes up twice as much memory space. - -4. After the image file is generated, the leader node notifies other non-leader nodes that a new image has been generated. Non-leader actively pulls the latest image files through HTTP to replace the old local files. - -5. The logs in bdbje will be deleted regularly after the image is completed. - -## Implementation details - -### Metadata catalogue - -1. The metadata directory is specified by the FE configuration item `meta_dir'. - -2. Data storage directory for bdbje under `bdb/` directory. - -3. The storage directory for image files under the `image/` directory. - -* `Image.[logid]`is the latest image file. The suffix `logid` indicates the ID of the last log contained in the image. -* `Image.ckpt` is the image file being written. If it is successfully written, it will be renamed `image.[logid]` and replaced with the original image file. -* The`cluster_id` is recorded in the `VERSION` file. `Cluster_id` uniquely identifies a Doris cluster. It is a 32-bit integer randomly generated at the first startup of leader. You can also specify a cluster ID through the Fe configuration item `cluster_id'. -* The role of FE itself recorded in the `ROLE` file. There are only `FOLLOWER` and `OBSERVER`. Where `FOLLOWER` denotes FE as an optional node. (Note: Even the leader node has a role of `FOLLOWER`) - -### Start-up process - -1. FE starts for the first time. If the startup script does not add any parameters, it will try to start as leader. You will eventually see `transfer from UNKNOWN to MASTER` in the FE startup log. - -2. FE starts for the first time. If the `-helper` parameter is specified in the startup script and points to the correct leader FE node, the FE first asks the leader node about its role (ROLE) and cluster_id through http. Then pull up the latest image file. After reading image file and generating metadata image, start bdbje and start bdbje log synchronization. After synchronization is completed, the log after image file in bdbje is replayed, and the final metadata image generation is completed. - - > Note 1: When starting with the `-helper` parameter, you need to first add the FE through the leader through the MySQL command, otherwise, the start will report an error. - - > Note 2: `-helper` can point to any follower node, even if it is not leader. - - > Note 3: In the process of synchronization log, the Fe log will show `xxx detached`. At this time, the log pull is in progress, which is a normal phenomenon. - -3. FE is not the first startup. If the startup script does not add any parameters, it will determine its identity according to the ROLE information stored locally. At the same time, according to the cluster information stored in the local bdbje, the leader information is obtained. Then read the local image file and the log in bdbje to complete the metadata image generation. (If the roles recorded in the local ROLE are inconsistent with those recorded in bdbje, an error will be reported.) - -4. FE is not the first boot, and the `-helper` parameter is specified in the boot script. Just like the first process started, the leader role is asked first. But it will be compared with the ROLE stored by itself. If they are inconsistent, they will report errors. - -#### Metadata Read-Write and Synchronization - -1. Users can use Mysql to connect any FE node to read and write metadata. If the connection is a non-leader node, the node forwards the write operation to the leader node. When the leader is successfully written, it returns a current and up-to-date log ID of the leader. Later, the non-leader node waits for the log ID it replays to be larger than the log ID it returns to the client before returning the message that the command succeeds. This approach guarantees Read-Your-Write semantics for any FE node. - - > Note: Some non-write operations are also forwarded to leader for execution. For example, `SHOW LOAD` operation. Because these commands usually need to read the intermediate states of some jobs, which are not written to bdbje, there are no such intermediate states in the memory of the non-leader node. (FE's direct metadata synchronization depends entirely on bdbje's log playback. If a metadata modification operation does not write bdbje's log, the result of the modification of the operation will not be seen in other non-leader nodes.) - -2. The leader node starts a TimePrinter thread. This thread periodically writes a key-value entry for the current time to bdbje. The remaining non-leader nodes read the recorded time in the log by playback and compare it with the local time. If the lag between the local time and the local time is found to be greater than the specified threshold (configuration item: `meta_delay_toleration_second`). If the write interval is half of the configuration item, the node will be in the **unreadable** state. This mechanism solves the problem that non-leader nodes still provide outdated metadata services after a long time of leader disconnection. - -3. The metadata of each FE only guarantees the final consistency. Normally, inconsistent window periods are only milliseconds. We guarantee the monotonous consistency of metadata access in the same session. But if the same client connects different FEs, metadata regression may occur. (But for batch update systems, this problem has little impact.) - -### Downtime recovery - -1. When the leader node goes down, the rest of the followers will immediately elect a new leader node to provide services. -2. Metadata cannot be written when most follower nodes are down. When metadata is not writable, if a write operation request occurs, the current process is that the **FE process exits**. This logic will be optimized in the future, and read services will still be provided in the non-writable state. -3. The downtime of observer node will not affect the state of any other node. It also does not affect metadata reading and writing at other nodes. diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/approx_count_distinct.md b/docs/en/sql-manual/sql-functions/aggregate-functions/approx_count_distinct.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/approx_count_distinct.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/approx_count_distinct.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/avg.md b/docs/en/sql-manual/sql-functions/aggregate-functions/avg.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/avg.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/avg.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/bitmap_union.md b/docs/en/sql-manual/sql-functions/aggregate-functions/bitmap_union.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/bitmap_union.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/bitmap_union.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/count.md b/docs/en/sql-manual/sql-functions/aggregate-functions/count.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/count.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/count.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/group_concat.md b/docs/en/sql-manual/sql-functions/aggregate-functions/group_concat.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/group_concat.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/group_concat.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md b/docs/en/sql-manual/sql-functions/aggregate-functions/hll_union_agg.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/hll_union_agg.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/max.md b/docs/en/sql-manual/sql-functions/aggregate-functions/max.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/max.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/max.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/max_by.md b/docs/en/sql-manual/sql-functions/aggregate-functions/max_by.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/max_by.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/max_by.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/min.md b/docs/en/sql-manual/sql-functions/aggregate-functions/min.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/min.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/min.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/min_by.md b/docs/en/sql-manual/sql-functions/aggregate-functions/min_by.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/min_by.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/min_by.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/percentile.md b/docs/en/sql-manual/sql-functions/aggregate-functions/percentile.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/percentile.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/percentile.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/percentile_approx.md b/docs/en/sql-manual/sql-functions/aggregate-functions/percentile_approx.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/percentile_approx.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/percentile_approx.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/stddev.md b/docs/en/sql-manual/sql-functions/aggregate-functions/stddev.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/stddev.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/stddev.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/stddev_samp.md b/docs/en/sql-manual/sql-functions/aggregate-functions/stddev_samp.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/stddev_samp.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/stddev_samp.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/sum.md b/docs/en/sql-manual/sql-functions/aggregate-functions/sum.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/sum.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/sum.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/topn.md b/docs/en/sql-manual/sql-functions/aggregate-functions/topn.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/topn.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/topn.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/var_samp.md b/docs/en/sql-manual/sql-functions/aggregate-functions/var_samp.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/var_samp.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/var_samp.md diff --git a/docs/en/sql-reference/sql-functions/aggregate-functions/variance.md b/docs/en/sql-manual/sql-functions/aggregate-functions/variance.md similarity index 100% rename from docs/en/sql-reference/sql-functions/aggregate-functions/variance.md rename to docs/en/sql-manual/sql-functions/aggregate-functions/variance.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_count.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_count.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_count.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_not.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_not.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_not_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not_count.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_and_not_count.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_and_not_count.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_contains.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_contains.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_contains.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_contains.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_empty.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_empty.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_empty.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_empty.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_from_string.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_from_string.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_from_string.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_from_string.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_has_all.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_all.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_has_all.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_all.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_has_any.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_any.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_has_any.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_has_any.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_hash.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_hash.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_hash.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_hash.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_intersect.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_intersect.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_max.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_max.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_max.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_max.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_min.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_min.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_min.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_min.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_not.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_not.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_not.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_not.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_or.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_or.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_or_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or_count.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_or_count.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_or_count.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_in_range.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_in_range.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_limit.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_subset_limit.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_to_string.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_to_string.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_to_string.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_to_string.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_union.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_union.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_xor.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_xor.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_xor_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor_count.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_xor_count.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/bitmap_xor_count.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md b/docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md b/docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/sub_bitmap.md b/docs/en/sql-manual/sql-functions/bitmap-functions/sub_bitmap.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/sub_bitmap.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/sub_bitmap.md diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/to_bitmap.md b/docs/en/sql-manual/sql-functions/bitmap-functions/to_bitmap.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitmap-functions/to_bitmap.md rename to docs/en/sql-manual/sql-functions/bitmap-functions/to_bitmap.md diff --git a/docs/en/sql-reference/sql-functions/bitwise-functions/bitand.md b/docs/en/sql-manual/sql-functions/bitwise-functions/bitand.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitwise-functions/bitand.md rename to docs/en/sql-manual/sql-functions/bitwise-functions/bitand.md diff --git a/docs/en/sql-reference/sql-functions/bitwise-functions/bitnot.md b/docs/en/sql-manual/sql-functions/bitwise-functions/bitnot.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitwise-functions/bitnot.md rename to docs/en/sql-manual/sql-functions/bitwise-functions/bitnot.md diff --git a/docs/en/sql-reference/sql-functions/bitwise-functions/bitor.md b/docs/en/sql-manual/sql-functions/bitwise-functions/bitor.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitwise-functions/bitor.md rename to docs/en/sql-manual/sql-functions/bitwise-functions/bitor.md diff --git a/docs/en/sql-reference/sql-functions/bitwise-functions/bitxor.md b/docs/en/sql-manual/sql-functions/bitwise-functions/bitxor.md similarity index 100% rename from docs/en/sql-reference/sql-functions/bitwise-functions/bitxor.md rename to docs/en/sql-manual/sql-functions/bitwise-functions/bitxor.md diff --git a/docs/en/sql-reference/sql-functions/cast.md b/docs/en/sql-manual/sql-functions/cast.md similarity index 100% rename from docs/en/sql-reference/sql-functions/cast.md rename to docs/en/sql-manual/sql-functions/cast.md diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/case.md b/docs/en/sql-manual/sql-functions/conditional-functions/case.md similarity index 100% rename from docs/en/sql-reference/sql-functions/conditional-functions/case.md rename to docs/en/sql-manual/sql-functions/conditional-functions/case.md diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/coalesce.md b/docs/en/sql-manual/sql-functions/conditional-functions/coalesce.md similarity index 100% rename from docs/en/sql-reference/sql-functions/conditional-functions/coalesce.md rename to docs/en/sql-manual/sql-functions/conditional-functions/coalesce.md diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/if.md b/docs/en/sql-manual/sql-functions/conditional-functions/if.md similarity index 100% rename from docs/en/sql-reference/sql-functions/conditional-functions/if.md rename to docs/en/sql-manual/sql-functions/conditional-functions/if.md diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/ifnull.md b/docs/en/sql-manual/sql-functions/conditional-functions/ifnull.md similarity index 100% rename from docs/en/sql-reference/sql-functions/conditional-functions/ifnull.md rename to docs/en/sql-manual/sql-functions/conditional-functions/ifnull.md diff --git a/docs/en/sql-reference/sql-functions/conditional-functions/nullif.md b/docs/en/sql-manual/sql-functions/conditional-functions/nullif.md similarity index 100% rename from docs/en/sql-reference/sql-functions/conditional-functions/nullif.md rename to docs/en/sql-manual/sql-functions/conditional-functions/nullif.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/convert_tz.md b/docs/en/sql-manual/sql-functions/date-time-functions/convert_tz.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/convert_tz.md rename to docs/en/sql-manual/sql-functions/date-time-functions/convert_tz.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/curdate.md b/docs/en/sql-manual/sql-functions/date-time-functions/curdate.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/curdate.md rename to docs/en/sql-manual/sql-functions/date-time-functions/curdate.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/current_timestamp.md b/docs/en/sql-manual/sql-functions/date-time-functions/current_timestamp.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/current_timestamp.md rename to docs/en/sql-manual/sql-functions/date-time-functions/current_timestamp.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/curtime.md b/docs/en/sql-manual/sql-functions/date-time-functions/curtime.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/curtime.md rename to docs/en/sql-manual/sql-functions/date-time-functions/curtime.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/date_add.md b/docs/en/sql-manual/sql-functions/date-time-functions/date_add.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/date_add.md rename to docs/en/sql-manual/sql-functions/date-time-functions/date_add.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/date_format.md b/docs/en/sql-manual/sql-functions/date-time-functions/date_format.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/date_format.md rename to docs/en/sql-manual/sql-functions/date-time-functions/date_format.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/date_sub.md b/docs/en/sql-manual/sql-functions/date-time-functions/date_sub.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/date_sub.md rename to docs/en/sql-manual/sql-functions/date-time-functions/date_sub.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/datediff.md b/docs/en/sql-manual/sql-functions/date-time-functions/datediff.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/datediff.md rename to docs/en/sql-manual/sql-functions/date-time-functions/datediff.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/day.md b/docs/en/sql-manual/sql-functions/date-time-functions/day.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/day.md rename to docs/en/sql-manual/sql-functions/date-time-functions/day.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/dayname.md b/docs/en/sql-manual/sql-functions/date-time-functions/dayname.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/dayname.md rename to docs/en/sql-manual/sql-functions/date-time-functions/dayname.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/dayofmonth.md b/docs/en/sql-manual/sql-functions/date-time-functions/dayofmonth.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/dayofmonth.md rename to docs/en/sql-manual/sql-functions/date-time-functions/dayofmonth.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/dayofweek.md b/docs/en/sql-manual/sql-functions/date-time-functions/dayofweek.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/dayofweek.md rename to docs/en/sql-manual/sql-functions/date-time-functions/dayofweek.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/dayofyear.md b/docs/en/sql-manual/sql-functions/date-time-functions/dayofyear.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/dayofyear.md rename to docs/en/sql-manual/sql-functions/date-time-functions/dayofyear.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/from_days.md b/docs/en/sql-manual/sql-functions/date-time-functions/from_days.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/from_days.md rename to docs/en/sql-manual/sql-functions/date-time-functions/from_days.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/from_unixtime.md b/docs/en/sql-manual/sql-functions/date-time-functions/from_unixtime.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/from_unixtime.md rename to docs/en/sql-manual/sql-functions/date-time-functions/from_unixtime.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/hour.md b/docs/en/sql-manual/sql-functions/date-time-functions/hour.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/hour.md rename to docs/en/sql-manual/sql-functions/date-time-functions/hour.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/makedate.md b/docs/en/sql-manual/sql-functions/date-time-functions/makedate.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/makedate.md rename to docs/en/sql-manual/sql-functions/date-time-functions/makedate.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/minute.md b/docs/en/sql-manual/sql-functions/date-time-functions/minute.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/minute.md rename to docs/en/sql-manual/sql-functions/date-time-functions/minute.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/month.md b/docs/en/sql-manual/sql-functions/date-time-functions/month.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/month.md rename to docs/en/sql-manual/sql-functions/date-time-functions/month.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/monthname.md b/docs/en/sql-manual/sql-functions/date-time-functions/monthname.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/monthname.md rename to docs/en/sql-manual/sql-functions/date-time-functions/monthname.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/now.md b/docs/en/sql-manual/sql-functions/date-time-functions/now.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/now.md rename to docs/en/sql-manual/sql-functions/date-time-functions/now.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/second.md b/docs/en/sql-manual/sql-functions/date-time-functions/second.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/second.md rename to docs/en/sql-manual/sql-functions/date-time-functions/second.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/str_to_date.md b/docs/en/sql-manual/sql-functions/date-time-functions/str_to_date.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/str_to_date.md rename to docs/en/sql-manual/sql-functions/date-time-functions/str_to_date.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/time_round.md b/docs/en/sql-manual/sql-functions/date-time-functions/time_round.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/time_round.md rename to docs/en/sql-manual/sql-functions/date-time-functions/time_round.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/timediff.md b/docs/en/sql-manual/sql-functions/date-time-functions/timediff.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/timediff.md rename to docs/en/sql-manual/sql-functions/date-time-functions/timediff.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/timestampadd.md b/docs/en/sql-manual/sql-functions/date-time-functions/timestampadd.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/timestampadd.md rename to docs/en/sql-manual/sql-functions/date-time-functions/timestampadd.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/timestampdiff.md b/docs/en/sql-manual/sql-functions/date-time-functions/timestampdiff.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/timestampdiff.md rename to docs/en/sql-manual/sql-functions/date-time-functions/timestampdiff.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/to_date.md b/docs/en/sql-manual/sql-functions/date-time-functions/to_date.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/to_date.md rename to docs/en/sql-manual/sql-functions/date-time-functions/to_date.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/to_days.md b/docs/en/sql-manual/sql-functions/date-time-functions/to_days.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/to_days.md rename to docs/en/sql-manual/sql-functions/date-time-functions/to_days.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/unix_timestamp.md b/docs/en/sql-manual/sql-functions/date-time-functions/unix_timestamp.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/unix_timestamp.md rename to docs/en/sql-manual/sql-functions/date-time-functions/unix_timestamp.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/utc_timestamp.md b/docs/en/sql-manual/sql-functions/date-time-functions/utc_timestamp.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/utc_timestamp.md rename to docs/en/sql-manual/sql-functions/date-time-functions/utc_timestamp.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/week.md b/docs/en/sql-manual/sql-functions/date-time-functions/week.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/week.md rename to docs/en/sql-manual/sql-functions/date-time-functions/week.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/weekday.md b/docs/en/sql-manual/sql-functions/date-time-functions/weekday.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/weekday.md rename to docs/en/sql-manual/sql-functions/date-time-functions/weekday.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/weekofyear.md b/docs/en/sql-manual/sql-functions/date-time-functions/weekofyear.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/weekofyear.md rename to docs/en/sql-manual/sql-functions/date-time-functions/weekofyear.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/year.md b/docs/en/sql-manual/sql-functions/date-time-functions/year.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/year.md rename to docs/en/sql-manual/sql-functions/date-time-functions/year.md diff --git a/docs/en/sql-reference/sql-functions/date-time-functions/yearweek.md b/docs/en/sql-manual/sql-functions/date-time-functions/yearweek.md similarity index 100% rename from docs/en/sql-reference/sql-functions/date-time-functions/yearweek.md rename to docs/en/sql-manual/sql-functions/date-time-functions/yearweek.md diff --git a/docs/en/sql-reference/sql-functions/digital-masking.md b/docs/en/sql-manual/sql-functions/digital-masking.md similarity index 100% rename from docs/en/sql-reference/sql-functions/digital-masking.md rename to docs/en/sql-manual/sql-functions/digital-masking.md diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/aes.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/aes.md similarity index 100% rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/aes.md rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/aes.md diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/md5.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5.md similarity index 100% rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/md5.md rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5.md diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/md5sum.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md similarity index 100% rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/md5sum.md rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm3.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3.md similarity index 100% rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm3.md rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3.md diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm3sum.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md similarity index 100% rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm3sum.md rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md diff --git a/docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm4.md b/docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm4.md similarity index 100% rename from docs/en/sql-reference/sql-functions/encrypt-digest-functions/sm4.md rename to docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm4.md diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/aes.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/aes.md similarity index 100% rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/aes.md rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/aes.md diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/md5.md similarity index 100% rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5.md rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/md5.md diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/md5sum.md similarity index 100% rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/md5sum.md diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm3.md similarity index 100% rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3.md rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm3.md diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm3sum.md similarity index 100% rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm3sum.md diff --git a/new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm4.md b/docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm4.md similarity index 100% rename from new-docs/en/sql-manual/sql-functions/encrypt-digest-functions/sm4.md rename to docs/en/sql-manual/sql-functions/encrypt-dixgest-functions/sm4.md diff --git a/docs/en/sql-reference/sql-functions/hash-functions/murmur_hash3_32.md b/docs/en/sql-manual/sql-functions/hash-functions/murmur_hash3_32.md similarity index 100% rename from docs/en/sql-reference/sql-functions/hash-functions/murmur_hash3_32.md rename to docs/en/sql-manual/sql-functions/hash-functions/murmur_hash3_32.md diff --git a/docs/en/sql-reference/sql-functions/json-functions/get_json_double.md b/docs/en/sql-manual/sql-functions/json-functions/get_json_double.md similarity index 100% rename from docs/en/sql-reference/sql-functions/json-functions/get_json_double.md rename to docs/en/sql-manual/sql-functions/json-functions/get_json_double.md diff --git a/docs/en/sql-reference/sql-functions/json-functions/get_json_int.md b/docs/en/sql-manual/sql-functions/json-functions/get_json_int.md similarity index 100% rename from docs/en/sql-reference/sql-functions/json-functions/get_json_int.md rename to docs/en/sql-manual/sql-functions/json-functions/get_json_int.md diff --git a/docs/en/sql-reference/sql-functions/json-functions/get_json_string.md b/docs/en/sql-manual/sql-functions/json-functions/get_json_string.md similarity index 100% rename from docs/en/sql-reference/sql-functions/json-functions/get_json_string.md rename to docs/en/sql-manual/sql-functions/json-functions/get_json_string.md diff --git a/docs/en/sql-reference/sql-functions/json-functions/json_array.md b/docs/en/sql-manual/sql-functions/json-functions/json_array.md similarity index 100% rename from docs/en/sql-reference/sql-functions/json-functions/json_array.md rename to docs/en/sql-manual/sql-functions/json-functions/json_array.md diff --git a/docs/en/sql-reference/sql-functions/json-functions/json_object.md b/docs/en/sql-manual/sql-functions/json-functions/json_object.md similarity index 100% rename from docs/en/sql-reference/sql-functions/json-functions/json_object.md rename to docs/en/sql-manual/sql-functions/json-functions/json_object.md diff --git a/docs/en/sql-reference/sql-functions/json-functions/json_quote.md b/docs/en/sql-manual/sql-functions/json-functions/json_quote.md similarity index 100% rename from docs/en/sql-reference/sql-functions/json-functions/json_quote.md rename to docs/en/sql-manual/sql-functions/json-functions/json_quote.md diff --git a/docs/en/sql-reference/sql-functions/math-functions/conv.md b/docs/en/sql-manual/sql-functions/math-functions/conv.md similarity index 100% rename from docs/en/sql-reference/sql-functions/math-functions/conv.md rename to docs/en/sql-manual/sql-functions/math-functions/conv.md diff --git a/docs/en/sql-reference/sql-functions/math-functions/pmod.md b/docs/en/sql-manual/sql-functions/math-functions/pmod.md similarity index 100% rename from docs/en/sql-reference/sql-functions/math-functions/pmod.md rename to docs/en/sql-manual/sql-functions/math-functions/pmod.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_astext.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_astext.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_astext.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_astext.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_circle.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_circle.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_circle.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_circle.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_contains.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_contains.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_contains.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_contains.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_distance_sphere.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_distance_sphere.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_geometryfromtext.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_geometryfromtext.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_linefromtext.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_linefromtext.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_linefromtext.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_linefromtext.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_point.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_point.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_point.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_point.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_polygon.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_polygon.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_polygon.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_polygon.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_x.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_x.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_x.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_x.md diff --git a/docs/en/sql-reference/sql-functions/spatial-functions/st_y.md b/docs/en/sql-manual/sql-functions/spatial-functions/st_y.md similarity index 100% rename from docs/en/sql-reference/sql-functions/spatial-functions/st_y.md rename to docs/en/sql-manual/sql-functions/spatial-functions/st_y.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/append_trailing_char_if_absent.md b/docs/en/sql-manual/sql-functions/string-functions/append_trailing_char_if_absent.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/append_trailing_char_if_absent.md rename to docs/en/sql-manual/sql-functions/string-functions/append_trailing_char_if_absent.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/ascii.md b/docs/en/sql-manual/sql-functions/string-functions/ascii.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/ascii.md rename to docs/en/sql-manual/sql-functions/string-functions/ascii.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/bit_length.md b/docs/en/sql-manual/sql-functions/string-functions/bit_length.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/bit_length.md rename to docs/en/sql-manual/sql-functions/string-functions/bit_length.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/char_length.md b/docs/en/sql-manual/sql-functions/string-functions/char_length.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/char_length.md rename to docs/en/sql-manual/sql-functions/string-functions/char_length.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/concat.md b/docs/en/sql-manual/sql-functions/string-functions/concat.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/concat.md rename to docs/en/sql-manual/sql-functions/string-functions/concat.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/concat_ws.md b/docs/en/sql-manual/sql-functions/string-functions/concat_ws.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/concat_ws.md rename to docs/en/sql-manual/sql-functions/string-functions/concat_ws.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/ends_with.md b/docs/en/sql-manual/sql-functions/string-functions/ends_with.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/ends_with.md rename to docs/en/sql-manual/sql-functions/string-functions/ends_with.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/find_in_set.md b/docs/en/sql-manual/sql-functions/string-functions/find_in_set.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/find_in_set.md rename to docs/en/sql-manual/sql-functions/string-functions/find_in_set.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/hex.md b/docs/en/sql-manual/sql-functions/string-functions/hex.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/hex.md rename to docs/en/sql-manual/sql-functions/string-functions/hex.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/instr.md b/docs/en/sql-manual/sql-functions/string-functions/instr.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/instr.md rename to docs/en/sql-manual/sql-functions/string-functions/instr.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/lcase.md b/docs/en/sql-manual/sql-functions/string-functions/lcase.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/lcase.md rename to docs/en/sql-manual/sql-functions/string-functions/lcase.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/left.md b/docs/en/sql-manual/sql-functions/string-functions/left.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/left.md rename to docs/en/sql-manual/sql-functions/string-functions/left.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/length.md b/docs/en/sql-manual/sql-functions/string-functions/length.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/length.md rename to docs/en/sql-manual/sql-functions/string-functions/length.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/like/like.md b/docs/en/sql-manual/sql-functions/string-functions/like/like.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/like/like.md rename to docs/en/sql-manual/sql-functions/string-functions/like/like.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/like/not_like.md b/docs/en/sql-manual/sql-functions/string-functions/like/not_like.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/like/not_like.md rename to docs/en/sql-manual/sql-functions/string-functions/like/not_like.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/locate.md b/docs/en/sql-manual/sql-functions/string-functions/locate.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/locate.md rename to docs/en/sql-manual/sql-functions/string-functions/locate.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/lower.md b/docs/en/sql-manual/sql-functions/string-functions/lower.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/lower.md rename to docs/en/sql-manual/sql-functions/string-functions/lower.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/lpad.md b/docs/en/sql-manual/sql-functions/string-functions/lpad.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/lpad.md rename to docs/en/sql-manual/sql-functions/string-functions/lpad.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/ltrim.md b/docs/en/sql-manual/sql-functions/string-functions/ltrim.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/ltrim.md rename to docs/en/sql-manual/sql-functions/string-functions/ltrim.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/money_format.md b/docs/en/sql-manual/sql-functions/string-functions/money_format.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/money_format.md rename to docs/en/sql-manual/sql-functions/string-functions/money_format.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/null_or_empty.md b/docs/en/sql-manual/sql-functions/string-functions/null_or_empty.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/null_or_empty.md rename to docs/en/sql-manual/sql-functions/string-functions/null_or_empty.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/regexp/not_regexp.md b/docs/en/sql-manual/sql-functions/string-functions/regexp/not_regexp.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/regexp/not_regexp.md rename to docs/en/sql-manual/sql-functions/string-functions/regexp/not_regexp.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/regexp/regexp.md b/docs/en/sql-manual/sql-functions/string-functions/regexp/regexp.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/regexp/regexp.md rename to docs/en/sql-manual/sql-functions/string-functions/regexp/regexp.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/regexp/regexp_extract.md b/docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/regexp/regexp_extract.md rename to docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/regexp/regexp_replace.md b/docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_replace.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/regexp/regexp_replace.md rename to docs/en/sql-manual/sql-functions/string-functions/regexp/regexp_replace.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/repeat.md b/docs/en/sql-manual/sql-functions/string-functions/repeat.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/repeat.md rename to docs/en/sql-manual/sql-functions/string-functions/repeat.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/replace.md b/docs/en/sql-manual/sql-functions/string-functions/replace.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/replace.md rename to docs/en/sql-manual/sql-functions/string-functions/replace.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/reverse.md b/docs/en/sql-manual/sql-functions/string-functions/reverse.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/reverse.md rename to docs/en/sql-manual/sql-functions/string-functions/reverse.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/right.md b/docs/en/sql-manual/sql-functions/string-functions/right.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/right.md rename to docs/en/sql-manual/sql-functions/string-functions/right.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/rpad.md b/docs/en/sql-manual/sql-functions/string-functions/rpad.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/rpad.md rename to docs/en/sql-manual/sql-functions/string-functions/rpad.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/split_part.md b/docs/en/sql-manual/sql-functions/string-functions/split_part.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/split_part.md rename to docs/en/sql-manual/sql-functions/string-functions/split_part.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/starts_with.md b/docs/en/sql-manual/sql-functions/string-functions/starts_with.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/starts_with.md rename to docs/en/sql-manual/sql-functions/string-functions/starts_with.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/strleft.md b/docs/en/sql-manual/sql-functions/string-functions/strleft.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/strleft.md rename to docs/en/sql-manual/sql-functions/string-functions/strleft.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/strright.md b/docs/en/sql-manual/sql-functions/string-functions/strright.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/strright.md rename to docs/en/sql-manual/sql-functions/string-functions/strright.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/substring.md b/docs/en/sql-manual/sql-functions/string-functions/substring.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/substring.md rename to docs/en/sql-manual/sql-functions/string-functions/substring.md diff --git a/docs/en/sql-reference/sql-functions/string-functions/unhex.md b/docs/en/sql-manual/sql-functions/string-functions/unhex.md similarity index 100% rename from docs/en/sql-reference/sql-functions/string-functions/unhex.md rename to docs/en/sql-manual/sql-functions/string-functions/unhex.md diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-bitmap.md b/docs/en/sql-manual/sql-functions/table-functions/explode-bitmap.md similarity index 100% rename from docs/en/sql-reference/sql-functions/table-functions/explode-bitmap.md rename to docs/en/sql-manual/sql-functions/table-functions/explode-bitmap.md diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-json-array.md b/docs/en/sql-manual/sql-functions/table-functions/explode-json-array.md similarity index 100% rename from docs/en/sql-reference/sql-functions/table-functions/explode-json-array.md rename to docs/en/sql-manual/sql-functions/table-functions/explode-json-array.md diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-numbers.md b/docs/en/sql-manual/sql-functions/table-functions/explode-numbers.md similarity index 100% rename from docs/en/sql-reference/sql-functions/table-functions/explode-numbers.md rename to docs/en/sql-manual/sql-functions/table-functions/explode-numbers.md diff --git a/docs/en/sql-reference/sql-functions/table-functions/explode-split.md b/docs/en/sql-manual/sql-functions/table-functions/explode-split.md similarity index 100% rename from docs/en/sql-reference/sql-functions/table-functions/explode-split.md rename to docs/en/sql-manual/sql-functions/table-functions/explode-split.md diff --git a/docs/en/sql-reference/sql-functions/table-functions/outer-combinator.md b/docs/en/sql-manual/sql-functions/table-functions/outer-combinator.md similarity index 100% rename from docs/en/sql-reference/sql-functions/table-functions/outer-combinator.md rename to docs/en/sql-manual/sql-functions/table-functions/outer-combinator.md diff --git a/docs/en/sql-reference/sql-functions/window-function.md b/docs/en/sql-manual/sql-functions/window-function.md similarity index 100% rename from docs/en/sql-reference/sql-functions/window-function.md rename to docs/en/sql-manual/sql-functions/window-function.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md b/docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md rename to docs/en/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md b/docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md rename to docs/en/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md b/docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/BIGINT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/BIGINT.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/BIGINT.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/BIGINT.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/BITMAP.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/BITMAP.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/BITMAP.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/BITMAP.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/BOOLEAN.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/BOOLEAN.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/BOOLEAN.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/BOOLEAN.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/CHAR.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/CHAR.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/CHAR.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/CHAR.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/DATE.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/DATE.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/DATE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/DATE.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/DATETIME.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/DATETIME.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/DATETIME.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/DATETIME.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/DECIMAL.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/DECIMAL.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/DECIMAL.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/DECIMAL.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/DOUBLE.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/DOUBLE.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/DOUBLE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/DOUBLE.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/FLOAT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/FLOAT.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/FLOAT.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/FLOAT.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/HLL.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/HLL.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/HLL.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/HLL.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/INT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/INT.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/INT.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/INT.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/QUANTILE_STATE.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/QUANTILE_STATE.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/QUANTILE_STATE.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/QUANTILE_STATE.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/SMALLINT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/SMALLINT.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/SMALLINT.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/SMALLINT.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/STRING.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/STRING.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/STRING.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/STRING.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/TINYINT.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/TINYINT.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/TINYINT.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/TINYINT.md diff --git a/docs/en/sql-reference/sql-statements/Data Types/VARCHAR.md b/docs/en/sql-manual/sql-reference-v2/Data-Types/VARCHAR.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Data Types/VARCHAR.md rename to docs/en/sql-manual/sql-reference-v2/Data-Types/VARCHAR.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW TABLET STORAGE FORMAT.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-TABLET-STORAGE-FORMAT.md similarity index 100% rename from docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW TABLET STORAGE FORMAT.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-TABLET-STORAGE-FORMAT.md diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ENABLE-FEATURE.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ENABLE-FEATURE.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ENABLE-FEATURE.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/ENABLE-FEATURE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md b/docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md rename to docs/en/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CHARSET.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CHARSET.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CHARSET.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CHARSET.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLLATION.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLLATION.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLLATION.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLLATION.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENGINES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENGINES.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EVENTS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EVENTS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-OPEN-TABLES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-OPEN-TABLES.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCEDURE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCEDURE.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCEDURE.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCEDURE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-QUERY-PROFILE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-QUERY-PROFILE.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-QUERY-PROFILE.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-QUERY-PROFILE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLLUP.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLLUP.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLLUP.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLLUP.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRIGGERS.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRIGGERS.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRIGGERS.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRIGGERS.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-USER.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-USER.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-WARNING.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WARNING.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-WARNING.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WARNING.md diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-WHITE-LIST.md b/docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WHITE-LIST.md similarity index 100% rename from docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-WHITE-LIST.md rename to docs/en/sql-manual/sql-reference-v2/Show-Statements/SHOW-WHITE-LIST.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md b/docs/en/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md rename to docs/en/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/HELP.md b/docs/en/sql-manual/sql-reference-v2/Utility-Statements/HELP.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/HELP.md rename to docs/en/sql-manual/sql-reference-v2/Utility-Statements/HELP.md diff --git a/new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/USE.md b/docs/en/sql-manual/sql-reference-v2/Utility-Statements/USE.md similarity index 100% rename from new-docs/en/sql-manual/sql-reference-v2/Utility-Statements/USE.md rename to docs/en/sql-manual/sql-reference-v2/Utility-Statements/USE.md diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md deleted file mode 100644 index e1690d81e9..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-USER", - "language": "en" -} ---- - - - -## ALTER-USER - -### Description - -### Example - -### Keywords - - ALTER, USER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md deleted file mode 100644 index b6fdbc3ad1..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-ROLE", - "language": "en" -} ---- - - - -## CREATE-ROLE - -### Description - -### Example - -### Keywords - - CREATE, ROLE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md deleted file mode 100644 index ade67b5f43..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-USER", - "language": "en" -} ---- - - - -## CREATE-USER - -### Description - -### Example - -### Keywords - - CREATE, USER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md deleted file mode 100644 index 16fab0dcfa..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-ROLE", - "language": "en" -} ---- - - - -## DROP-ROLE - -### Description - -### Example - -### Keywords - - DROP, ROLE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md deleted file mode 100644 index 9ae0d1e76c..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-USER", - "language": "en" -} ---- - - - -## DROP-USER - -### Description - -### Example - -### Keywords - - DROP, USER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md deleted file mode 100644 index db5180221d..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "GRANT", - "language": "en" -} ---- - - - -## GRANT - -### Description - -### Example - -### Keywords - - GRANT - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md deleted file mode 100644 index 08066c9e53..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "LDAP", - "language": "en" -} ---- - - - -## LDAP - -### Description - -### Example - -### Keywords - - LDAP - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md deleted file mode 100644 index e29f278220..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "REVOKE", - "language": "en" -} ---- - - - -## REVOKE - -### Description - -### Example - -### Keywords - - REVOKE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md deleted file mode 100644 index ca52125b77..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SET-PASSWORD", - "language": "en" -} ---- - - - -## SET-PASSWORD - -### Description - -### Example - -### Keywords - - SET, PASSWORD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md b/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md deleted file mode 100644 index 47732877e6..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SET-PROPERTY", - "language": "en" -} ---- - - - -## SET-PROPERTY - -### Description - -### Example - -### Keywords - - SET, PROPERTY - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md deleted file mode 100644 index 8673f99d5b..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-ADD-BACKEND", - "language": "en" -} ---- - - - -## ALTER-SYSTEM-ADD-BACKEND - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, ADD, BACKEND - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md deleted file mode 100644 index d90642ce59..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-ADD-FOLLOWER", - "language": "en" -} ---- - - - -## ALTER-SYSTEM-ADD-FOLLOWER - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, ADD, FOLLOWER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md deleted file mode 100644 index 439e3566a3..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-ADD-OBSERVER", - "language": "en" -} ---- - - - -## ALTER-SYSTEM-ADD-OBSERVER - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, ADD, OBSERVER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md deleted file mode 100644 index 5ae550f0c4..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-DECOMMISSION-BACKEND", - "language": "en" -} ---- - - - -## ALTER-SYSTEM-DECOMMISSION-BACKEND - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, DECOMMISSION, BACKEND - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md deleted file mode 100644 index e2f2e0bc44..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-DROP-BACKEND", - "language": "en" -} ---- - - - -## ALTER-SYSTEM-DROP-BACKEND - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, DROP, BACKEND - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md deleted file mode 100644 index 9999fa40ad..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-DROP-FOLLOWER", - "language": "en" -} ---- - - - -## ALTER-SYSTEM-DROP-FOLLOWER - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, DROP, FOLLOWER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md deleted file mode 100644 index 277f8096f8..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-DROP-OBSERVER", - "language": "en" -} ---- - - - -## ALTER-SYSTEM-DROP-OBSERVER - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, DROP, OBSERVER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md b/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md deleted file mode 100644 index 1b66326d55..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-ALTER-SYSTEM", - "language": "en" -} ---- - - - -## CANCEL-ALTER-SYSTEM - -### Description - -### Example - -### Keywords - - CANCEL, ALTER, SYSTEM - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md deleted file mode 100644 index 527f4ac1ee..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-DATABASE", - "language": "en" -} ---- - - - -## ALTER-DATABASE - -### Description - -### Example - -### Keywords - - ALTER, DATABASE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md deleted file mode 100644 index 48fdae2ee0..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-COLUMN", - "language": "en" -} ---- - - - -## ALTER-TABLE-COLUMN - -### Description - -### Example - -### Keywords - - ALTER, TABLE, COLUMN - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md deleted file mode 100644 index df6150a5d0..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-PARTITION", - "language": "en" -} ---- - - - -## ALTER-TABLE-PARTITION - -### Description - -### Example - -### Keywords - - ALTER, TABLE, PARTITION - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md deleted file mode 100644 index fe094ecab3..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-PROPERTY", - "language": "en" -} ---- - - - -## ALTER-TABLE-PROPERTY - -### Description - -### Example - -### Keywords - - ALTER, TABLE, PROPERTY - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md deleted file mode 100644 index dfaae33b28..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-RENAME", - "language": "en" -} ---- - - - -## ALTER-TABLE-RENAME - -### Description - -### Example - -### Keywords - - ALTER, TABLE, RENAME - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md deleted file mode 100644 index e383908221..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-REPLACE", - "language": "en" -} ---- - - - -## ALTER-TABLE-REPLACE - -### Description - -### Example - -### Keywords - - ALTER, TABLE, REPLACE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md deleted file mode 100644 index 99ab24c164..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-ROLLUP", - "language": "en" -} ---- - - - -## ALTER-TABLE-ROLLUP - -### Description - -### Example - -### Keywords - - ALTER, TABLE, ROLLUP - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md deleted file mode 100644 index 23ab56ba07..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-VIEW", - "language": "en" -} ---- - - - -## ALTER-VIEW - -### Description - -### Example - -### Keywords - - ALTER, VIEW - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md deleted file mode 100644 index ca6777e2fb..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-ALTER-TABLE", - "language": "en" -} ---- - - - -## CANCEL-ALTER-TABLE - -### Description - -### Example - -### Keywords - - CANCEL, ALTER, TABLE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md deleted file mode 100644 index e4f00c442c..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "BACKUP", - "language": "en" -} ---- - - - -## BACKUP - -### Description - -### Example - -### Keywords - - BACKUP - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md deleted file mode 100644 index bb6d9448fc..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-BACKUP", - "language": "en" -} ---- - - - -## CANCEL-BACKUP - -### Description - -### Example - -### Keywords - - CANCEL, BACKUP - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md deleted file mode 100644 index fa50eca878..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-RESTORE", - "language": "en" -} ---- - - - -## CANCEL-RESTORE - -### Description - -### Example - -### Keywords - - CANCEL, RESTORE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md deleted file mode 100644 index 611bfab2fc..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-REPOSITORY", - "language": "en" -} ---- - - - -## CREATE-REPOSITORY - -### Description - -### Example - -### Keywords - - CREATE, REPOSITORY - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md deleted file mode 100644 index adf837436a..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-REPOSITORY", - "language": "en" -} ---- - - - -## DROP-REPOSITORY - -### Description - -### Example - -### Keywords - - DROP, REPOSITORY - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md deleted file mode 100644 index 9ff29b3f98..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "RESTORE", - "language": "en" -} ---- - - - -## RESTORE - -### Description - -### Example - -### Keywords - - RESTORE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md deleted file mode 100644 index b4b092baca..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-DATABASE", - "language": "en" -} ---- - - - -## CREATE-DATABASE - -### Description - -### Example - -### Keywords - - CREATE, DATABASE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md deleted file mode 100644 index 5bbc792e74..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-ENCRYPT-KEY", - "language": "en" -} ---- - - - -## CREATE-ENCRYPT-KEY - -### Description - -### Example - -### Keywords - - CREATE, ENCRYPT, KEY - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md deleted file mode 100644 index 2003bab853..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-FILE", - "language": "en" -} ---- - - - -## CREATE-FILE - -### Description - -### Example - -### Keywords - - CREATE, FILE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md deleted file mode 100644 index 315acc4c92..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-FUNCTION", - "language": "en" -} ---- - - - -## CREATE-FUNCTION - -### Description - -### Example - -### Keywords - - CREATE, FUNCTION - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md deleted file mode 100644 index 292ecbc9e1..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-INDEX", - "language": "en" -} ---- - - - -## CREATE-INDEX - -### Description - -### Example - -### Keywords - - CREATE, INDEX - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md deleted file mode 100644 index 03223ea223..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-MATERIALIZED-VIEW", - "language": "en" -} ---- - - - -## CREATE-MATERIALIZED-VIEW - -### Description - -### Example - -### Keywords - - CREATE, MATERIALIZED, VIEW - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md deleted file mode 100644 index 39593772a1..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-RESOURCE", - "language": "en" -} ---- - - - -## CREATE-RESOURCE - -### Description - -### Example - -### Keywords - - CREATE, RESOURCE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md deleted file mode 100644 index 156020f7de..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-TABLE-LIKE", - "language": "en" -} ---- - - - -## CREATE-TABLE-LIKE - -### Description - -### Example - -### Keywords - - CREATE, TABLE, LIKE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md deleted file mode 100644 index 72916ef337..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md +++ /dev/null @@ -1,568 +0,0 @@ ---- -{ - "title": "CREATE-TABLE", - "language": "en" -} ---- - - - -## CREATE-TABLE - -### Description - -This command is used to create a table. The subject of this document describes the syntax for creating Doris self-maintained tables. For external table syntax, please refer to the [CREATE-EXTERNAL-TABLE] document. - -```sql -CREATE TABLE [IF NOT EXISTS] [database.]table -( - column_definition_list, - [index_definition_list] -) -[engine_type] -[keys_type] -[table_comment] -[partition_info] -distribution_info -[rollup_list] -[properties] -[extra_properties] -``` - -* `column_definition_list` - - Column definition list: - - `column_definition[, column_definition]` - - * `column_definition` - - Column definition: - - `column_name column_type [KEY] [aggr_type] [NULL] [default_value] [column_comment]` - - * `column_type` - - Column type, the following types are supported: - - ``` - TINYINT (1 byte) - Range: -2^7 + 1 ~ 2^7-1 - SMALLINT (2 bytes) - Range: -2^15 + 1 ~ 2^15-1 - INT (4 bytes) - Range: -2^31 + 1 ~ 2^31-1 - BIGINT (8 bytes) - Range: -2^63 + 1 ~ 2^63-1 - LARGEINT (16 bytes) - Range: -2^127 + 1 ~ 2^127-1 - FLOAT (4 bytes) - Support scientific notation - DOUBLE (12 bytes) - Support scientific notation - DECIMAL[(precision, scale)] (16 bytes) - The decimal type with guaranteed precision. The default is DECIMAL(10, 0) - precision: 1 ~ 27 - scale: 0 ~ 9 - Where the integer part is 1 ~ 18 - Does not support scientific notation - DATE (3 bytes) - Range: 0000-01-01 ~ 9999-12-31 - DATETIME (8 bytes) - Range: 0000-01-01 00:00:00 ~ 9999-12-31 23:59:59 - CHAR[(length)] - Fixed-length character string. Length range: 1 ~ 255. Default is 1 - VARCHAR[(length)] - Variable length character string. Length range: 1 ~ 65533. Default is 1 - HLL (1~16385 bytes) - HyperLogLog column type, do not need to specify the length and default value. The length is controlled within the system according to the degree of data aggregation. - Must be used with HLL_UNION aggregation type. - BITMAP - The bitmap column type does not need to specify the length and default value. Represents a collection of integers, and the maximum number of elements supported is 2^64-1. - Must be used with BITMAP_UNION aggregation type. - ``` - - * `aggr_type` - - Aggregation type, the following aggregation types are supported: - - ``` - SUM: Sum. Applicable numeric types. - MIN: Find the minimum value. Suitable for numeric types. - MAX: Find the maximum value. Suitable for numeric types. - REPLACE: Replace. For rows with the same dimension column, the index column will be imported in the order of import, and the last imported will replace the first imported. - REPLACE_IF_NOT_NULL: non-null value replacement. The difference with REPLACE is that there is no replacement for null values. It should be noted here that the default value should be NULL, not an empty string. If it is an empty string, you should replace it with an empty string. - HLL_UNION: The aggregation method of HLL type columns, aggregated by HyperLogLog algorithm. - BITMAP_UNION: The aggregation mode of BIMTAP type columns, which performs the union aggregation of bitmaps. - ``` - - Example: - - ``` - k1 TINYINT, - k2 DECIMAL(10,2) DEFAULT "10.5", - k4 BIGINT NULL DEFAULT VALUE "1000" COMMENT "This is column k4", - v1 VARCHAR(10) REPLACE NOT NULL, - v2 BITMAP BITMAP_UNION, - v3 HLL HLL_UNION, - v4 INT SUM NOT NULL DEFAULT "1" COMMENT "This is column v4" - ``` - -* `index_definition_list` - - Index list definition: - - `index_definition[, index_definition]` - - * `index_definition` - - Index definition: - - ```sql - INDEX index_name (col_name) [USING BITMAP] COMMENT'xxxxxx' - ``` - - Example: - - ```sql - INDEX idx1 (k1) USING BITMAP COMMENT "This is a bitmap index1", - INDEX idx2 (k2) USING BITMAP COMMENT "This is a bitmap index2", - ... - ``` - -* `engine_type` - - Table engine type. All types in this document are OLAP. For other external table engine types, see [CREATE EXTERNAL TABLE] (DORIS/SQL Manual/Syntax Help/DDL/CREATE-EXTERNAL-TABLE.md) document. Example: - - `ENGINE=olap` - -* `key_desc` - - Data model. - - `key_type(col1, col2, ...)` - - `key_type` supports the following models: - - * DUPLICATE KEY (default): The subsequent specified column is the sorting column. - * AGGREGATE KEY: The specified column is the dimension column. - * UNIQUE KEY: The subsequent specified column is the primary key column. - - Example: - - ``` - DUPLICATE KEY(col1, col2), - AGGREGATE KEY(k1, k2, k3), - UNIQUE KEY(k1, k2) - ``` - -* `table_comment` - - Table notes. Example: - - ``` - COMMENT "This is my first DORIS table" - ``` - -* `partition_desc` - - Partition information supports two writing methods: - - 1. LESS THAN: Only define the upper boundary of the partition. The lower bound is determined by the upper bound of the previous partition. - - ``` - PARTITION BY RANGE(col1[, col2, ...]) - ( - PARTITION partition_name1 VALUES LESS THAN MAXVALUE|("value1", "value2", ...), - PARTITION partition_name2 VALUES LESS THAN MAXVALUE|("value1", "value2", ...) - ) - ``` - - 2. FIXED RANGE: Define the left closed and right open interval of the zone. - - ``` - PARTITION BY RANGE(col1[, col2, ...]) - ( - PARTITION partition_name1 VALUES [("k1-lower1", "k2-lower1", "k3-lower1",...), ("k1-upper1", "k2-upper1", "k3-upper1", ... )), - PARTITION partition_name2 VALUES [("k1-lower1-2", "k2-lower1-2", ...), ("k1-upper1-2", MAXVALUE, )) - ) - ``` - -* `distribution_desc` - - Define the data bucketing method. - - `DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]` - -* `rollup_list` - - Multiple materialized views (ROLLUP) can be created at the same time as the table is built. - - `ROLLUP (rollup_definition[, rollup_definition, ...])` - - * `rollup_definition` - - `rollup_name (col1[, col2, ...]) [DUPLICATE KEY(col1[, col2, ...])] [PROPERTIES("key" = "value")]` - - Example: - - ``` - ROLLUP ( - r1 (k1, k3, v1, v2), - r2 (k1, v1) - ) - ``` - -* `properties` - - Set table properties. The following attributes are currently supported: - - * `replication_num` - - Number of copies. The default number of copies is 3. If the number of BE nodes is less than 3, you need to specify that the number of copies is less than or equal to the number of BE nodes. - - After version 0.15, this attribute will be automatically converted to the `replication_allocation` attribute, such as: - - `"replication_num" = "3"` will be automatically converted to `"replication_allocation" = "tag.location.default:3"` - - * `replication_allocation` - - Set the copy distribution according to Tag. This attribute can completely cover the function of the `replication_num` attribute. - - * `storage_medium/storage_cooldown_time` - - Data storage medium. `storage_medium` is used to declare the initial storage medium of the table data, and `storage_cooldown_time` is used to set the expiration time. Example: - - ``` - "storage_medium" = "SSD", - "storage_cooldown_time" = "2020-11-20 00:00:00" - ``` - - This example indicates that the data is stored in the SSD and will be automatically migrated to the HDD storage after the expiration of 2020-11-20 00:00:00. - - * `colocate_with` - - When you need to use the Colocation Join function, use this parameter to set the Colocation Group. - - `"colocate_with" = "group1"` - - * `bloom_filter_columns` - - The user specifies the list of column names that need to be added to the Bloom Filter index. The Bloom Filter index of each column is independent, not a composite index. - - `"bloom_filter_columns" = "k1, k2, k3"` - - * `in_memory` - - Use this property to set whether the table is [Memory Table] (DORIS/Operation Manual/Memory Table.md). - - `"in_memory" = "true"` - - * `function_column.sequence_type` - - When using the UNIQUE KEY model, you can specify a sequence column. When the KEY columns are the same, REPLACE will be performed according to the sequence column (the larger value replaces the smaller value, otherwise it cannot be replaced) - - Here we only need to specify the type of sequence column, support time type or integer type. Doris will create a hidden sequence column. - - `"function_column.sequence_type" ='Date'` - - * Dynamic partition related - - The relevant parameters of dynamic partition are as follows: - - * `dynamic_partition.enable`: Used to specify whether the dynamic partition function at the table level is enabled. The default is true. - * `dynamic_partition.time_unit:` is used to specify the time unit for dynamically adding partitions, which can be selected as DAY (day), WEEK (week), MONTH (month), HOUR (hour). - * `dynamic_partition.start`: Used to specify how many partitions to delete forward. The value must be less than 0. The default is Integer.MIN_VALUE. - * `dynamic_partition.end`: Used to specify the number of partitions created in advance. The value must be greater than 0. - * `dynamic_partition.prefix`: Used to specify the partition name prefix to be created. For example, if the partition name prefix is ​​p, the partition name will be automatically created as p20200108. - * `dynamic_partition.buckets`: Used to specify the number of partition buckets that are automatically created. - * `dynamic_partition.create_history_partition`: Whether to create a history partition. - * `dynamic_partition.history_partition_num`: Specify the number of historical partitions to be created. - * `dynamic_partition.reserved_history_periods`: Used to specify the range of reserved history periods. - - * Data Sort Info - - The relevant parameters of data sort info are as follows: - - * `data_sort.sort_type`: the method of data sorting, options: z-order/lexical, default is lexical - * `data_sort.col_num`: the first few columns to sort, col_num muster less than total key counts - -### Example - -1. Create a detailed model table - - ```sql - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - k3 CHAR(10) COMMENT "string column", - k4 INT NOT NULL DEFAULT "1" COMMENT "int column" - ) - COMMENT "my first table" - DISTRIBUTED BY HASH(k1) BUCKETS 32 - ``` - -2. Create a detailed model table, partition, specify the sorting column, and set the number of copies to 1 - - ```sql - CREATE TABLE example_db.table_hash - ( - k1 DATE, - k2 DECIMAL(10, 2) DEFAULT "10.5", - k3 CHAR(10) COMMENT "string column", - k4 INT NOT NULL DEFAULT "1" COMMENT "int column" - ) - DUPLICATE KEY(k1, k2) - COMMENT "my first table" - PARTITION BY RANGE(k1) - ( - PARTITION p1 VALUES LESS THAN ("2020-02-01"), - PARTITION p1 VALUES LESS THAN ("2020-03-01"), - PARTITION p1 VALUES LESS THAN ("2020-04-01") - ) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ( - "replication_num" = "1" - ); - ``` - -3. Create a table with a unique model of the primary key, set the initial storage medium and cooling time - - ```sql - CREATE TABLE example_db.table_hash - ( - k1 BIGINT, - k2 LARGEINT, - v1 VARCHAR(2048) REPLACE, - v2 SMALLINT SUM DEFAULT "10" - ) - UNIQUE KEY(k1, k2) - DISTRIBUTED BY HASH (k1, k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", - "storage_cooldown_time" = "2015-06-04 00:00:00" - ); - ``` - -4. Create an aggregate model table, using a fixed range partition description - - ```sql - CREATE TABLE table_range - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048) REPLACE, - v2 INT SUM DEFAULT "1" - ) - AGGREGATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1, k2, k3) - ( - PARTITION p1 VALUES [("2014-01-01", "10", "200"), ("2014-01-01", "20", "300")), - PARTITION p2 VALUES [("2014-06-01", "100", "200"), ("2014-07-01", "100", "300")) - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - ``` - -5. Create an aggregate model table with HLL and BITMAP column types - - ```sql - CREATE TABLE example_db.example_table - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 HLL HLL_UNION, - v2 BITMAP BITMAP_UNION - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - ``` - -6. Create two self-maintained tables of the same Colocation Group. - - ```sql - CREATE TABLE t1 ( - id int(11) COMMENT "", - value varchar(8) COMMENT "" - ) - DUPLICATE KEY(id) - DISTRIBUTED BY HASH(id) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "group1" - ); - - CREATE TABLE t2 ( - id int(11) COMMENT "", - value1 varchar(8) COMMENT "", - value2 varchar(8) COMMENT "" - ) - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "group1" - ); - ``` - -7. Create a memory table with bitmap index and bloom filter index - - ```sql - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 CHAR(10) REPLACE, - v2 INT SUM, - INDEX k1_idx (k1) USING BITMAP COMMENT'my first index' - ) - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ( - "bloom_filter_columns" = "k2", - "in_memory" = "true" - ); - ``` - -8. Create a dynamic partition table. - - The table creates partitions 3 days in advance every day, and deletes the partitions 3 days ago. For example, if today is `2020-01-08`, partitions named `p20200108`, `p20200109`, `p20200110`, `p20200111` will be created. The partition ranges are: - - ``` - [types: [DATE]; keys: [2020-01-08]; ‥types: [DATE]; keys: [2020-01-09];) - [types: [DATE]; keys: [2020-01-09]; ‥types: [DATE]; keys: [2020-01-10];) - [types: [DATE]; keys: [2020-01-10]; ‥types: [DATE]; keys: [2020-01-11];) - [types: [DATE]; keys: [2020-01-11]; ‥types: [DATE]; keys: [2020-01-12];) - ``` - - ```sql - CREATE TABLE example_db.dynamic_partition - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - DUPLICATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1) () - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-3", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32" - ); - ``` - -9. Create a table with a materialized view (ROLLUP). - - ```sql - CREATE TABLE example_db.rolup_index_table - ( - event_day DATE, - siteid INT DEFAULT '10', - citycode SMALLINT, - username VARCHAR(32) DEFAULT'', - pv BIGINT SUM DEFAULT '0' - ) - AGGREGATE KEY(event_day, siteid, citycode, username) - DISTRIBUTED BY HASH(siteid) BUCKETS 10 - ROLLUP ( - r1(event_day,siteid), - r2(event_day,citycode), - r3(event_day) - ) - PROPERTIES("replication_num" = "3"); - ``` - -10. Set the replica of the table through the `replication_allocation` property. - - ```sql - CREATE TABLE example_db.table_hash - ( - - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5" - ) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ( - "replication_allocation"="tag.location.group_a:1, tag.location.group_b:2" - ); - - CREATE TABLE example_db.dynamic_partition - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - PARTITION BY RANGE (k1) () - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-3", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32", - "dynamic_partition."replication_allocation" = "tag.location.group_a:3" - ); - ``` -### Keywords - - CREATE, TABLE - -### Best Practice - -#### Partitioning and bucketing - -A table must specify the bucket column, but it does not need to specify the partition. For the specific introduction of partitioning and bucketing, please refer to the [Data Division] (DORIS/Getting Started/Relational Model and Data Division.md) document. - -Tables in Doris can be divided into partitioned tables and non-partitioned tables. This attribute is determined when the table is created and cannot be changed afterwards. That is, for partitioned tables, you can add or delete partitions in the subsequent use process, and for non-partitioned tables, you can no longer perform operations such as adding partitions afterwards. - -At the same time, partitioning columns and bucketing columns cannot be changed after the table is created. You can neither change the types of partitioning and bucketing columns, nor do any additions or deletions to these columns. - -Therefore, it is recommended to confirm the usage method to build the table reasonably before building the table. - -#### Dynamic Partition - -The dynamic partition function is mainly used to help users automatically manage partitions. By setting certain rules, the Doris system regularly adds new partitions or deletes historical partitions. Please refer to [Dynamic Partition] (DORIS/Operation Manual/Dynamic Partition.md) document for more help. - -#### Materialized View - -Users can create multiple materialized views (ROLLUP) while building a table. Materialized views can also be added after the table is built. It is convenient for users to create all materialized views at one time by writing in the table creation statement. - -If the materialized view is created when the table is created, all subsequent data import operations will synchronize the data of the materialized view to be generated. The number of materialized views may affect the efficiency of data import. - -If you add a materialized view in the subsequent use process, if there is data in the table, the creation time of the materialized view depends on the current amount of data. - -For the introduction of materialized views, please refer to the document [materialized views] (DORIS/Operation Manual/materialized views.md). - -#### Index - -Users can create indexes on multiple columns while building a table. Indexes can also be added after the table is built. - -If you add an index in the subsequent use process, if there is data in the table, you need to rewrite all the data, so the creation time of the index depends on the current data volume. - -#### Memory table - -The `"in_memory" = "true"` attribute was specified when the table was created. Doris will try to cache the data blocks of the table in the PageCache of the storage engine, which has reduced disk IO. However, this attribute does not guarantee that the data block is permanently resident in memory, and is only used as a best-effort identification. diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md deleted file mode 100644 index e87ca33a8c..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-VIEW", - "language": "en" -} ---- - - - -## CREATE-VIEW - -### Description - -### Example - -### Keywords - - CREATE, VIEW - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md deleted file mode 100644 index fae4759016..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-DATABASE", - "language": "en" -} ---- - - - -## DROP-DATABASE - -### Description - -### Example - -### Keywords - - DROP, DATABASE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md deleted file mode 100644 index 843c93c8f2..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-ENCRYPT-KEY", - "language": "en" -} ---- - - - -## DROP-ENCRYPT-KEY - -### Description - -### Example - -### Keywords - - DROP, ENCRYPT, KEY - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md deleted file mode 100644 index cdcfe5963d..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-FILE", - "language": "en" -} ---- - - - -## DROP-FILE - -### Description - -### Example - -### Keywords - - DROP, FILE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md deleted file mode 100644 index e828d61c96..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-FUNCTION", - "language": "en" -} ---- - - - -## DROP-FUNCTION - -### Description - -### Example - -### Keywords - - DROP, FUNCTION - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md deleted file mode 100644 index 9be0add1b4..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-INDEX", - "language": "en" -} ---- - - - -## DROP-INDEX - -### Description - -### Example - -### Keywords - - DROP, INDEX - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md deleted file mode 100644 index be27fddead..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-MATERIALIZED-VIEW", - "language": "en" -} ---- - - - -## DROP-MATERIALIZED-VIEW - -### Description - -### Example - -### Keywords - - DROP, MATERIALIZED, VIEW - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md deleted file mode 100644 index 9547f92552..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-RESOURCE", - "language": "en" -} ---- - - - -## DROP-RESOURCE - -### Description - -### Example - -### Keywords - - DROP, RESOURCE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md deleted file mode 100644 index 72c116f750..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-TABLE", - "language": "en" -} ---- - - - -## DROP-TABLE - -### Description - -### Example - -### Keywords - - DROP, TABLE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md deleted file mode 100644 index 448ff667c0..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "TRUNCATE-TABLE", - "language": "en" -} ---- - - - -## TRUNCATE-TABLE - -### Description - -### Example - -### Keywords - - TRUNCATE, TABLE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md deleted file mode 100644 index 8092c7644b..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-ROUTINE-LOAD", - "language": "en" -} ---- - - - -## ALTER-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - ALTER, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md deleted file mode 100644 index bad06e60a8..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "BROKER-LOAD", - "language": "en" -} ---- - - - -## BROKER-LOAD - -### Description - -### Example - -### Keywords - - BROKER, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md deleted file mode 100644 index dfa1b883eb..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-LOAD", - "language": "en" -} ---- - - - -## CANCEL-LOAD - -### Description - -### Example - -### Keywords - - CANCEL, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md deleted file mode 100644 index c4f1f52119..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-ROUTINE-LOAD", - "language": "en" -} ---- - - - -## CREATE-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - CREATE, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md deleted file mode 100644 index 63055f687c..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "PAUSE-ROUTINE-LOAD", - "language": "en" -} ---- - - - -## PAUSE-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - PAUSE, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md deleted file mode 100644 index ee49c95db8..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "RESUME-ROUTINE-LOAD", - "language": "en" -} ---- - - - -## RESUME-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - RESUME, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md deleted file mode 100644 index e8cd90cdec..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "STOP-ROUTINE-LOAD", - "language": "en" -} ---- - - - -## STOP-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - STOP, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md deleted file mode 100644 index 6e7e24522b..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "STREAM-LOAD", - "language": "en" -} ---- - - - -## STREAM-LOAD - -### Description - -### Example - -### Keywords - - STREAM, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md deleted file mode 100644 index e233d84024..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DELETE", - "language": "en" -} ---- - - - -## DELETE - -### Description - -### Example - -### Keywords - - DELETE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md deleted file mode 100644 index 9931392c45..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "INSERT", - "language": "en" -} ---- - - - -## INSERT - -### Description - -### Example - -### Keywords - - INSERT - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md b/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md deleted file mode 100644 index bf608a8849..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "UPDATE", - "language": "en" -} ---- - - - -## UPDATE - -### Description - -### Example - -### Keywords - - UPDATE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md deleted file mode 100644 index 33eaed0cce..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-CANCEL-REPAIR", - "language": "en" -} ---- - - - -## ADMIN-CANCEL-REPAIR - -### Description - -### Example - -### Keywords - - ADMIN, CANCEL, REPAIR - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md deleted file mode 100644 index 97aaa56b51..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-CHECK-TABLET", - "language": "en" -} ---- - - - -## ADMIN-CHECK-TABLET - -### Description - -### Example - -### Keywords - - ADMIN, CHECK, TABLET - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md deleted file mode 100644 index 532befbc0e..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-REPAIR-TABLE", - "language": "en" -} ---- - - - -## ADMIN-REPAIR-TABLE - -### Description - -### Example - -### Keywords - - ADMIN, REPAIR, TABLE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md deleted file mode 100644 index abf9b768f7..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SET-CONFIG", - "language": "en" -} ---- - - - -## ADMIN-SET-CONFIG - -### Description - -### Example - -### Keywords - - ADMIN, SET, CONFIG - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md deleted file mode 100644 index eb6a7ad75a..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SET-REPLICA-STATUS", - "language": "en" -} ---- - - - -## ADMIN-SET-REPLICA-STATUS - -### Description - -### Example - -### Keywords - - ADMIN, SET, REPLICA, STATUS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md deleted file mode 100644 index 56aac70c96..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SHOW-CONFIG", - "language": "en" -} ---- - - - -## ADMIN-SHOW-CONFIG - -### Description - -### Example - -### Keywords - - ADMIN, SHOW, CONFIG - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md deleted file mode 100644 index 0312d5dd01..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SHOW-REPLICA-DISTRIBUTION", - "language": "en" -} ---- - - - -## ADMIN-SHOW-REPLICA-DISTRIBUTION - -### Description - -### Example - -### Keywords - - ADMIN, SHOW, REPLICA, DISTRIBUTION - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md deleted file mode 100644 index 17366282da..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SHOW-REPLICA-STATUS", - "language": "en" -} ---- - - - -## ADMIN-SHOW-REPLICA-STATUS - -### Description - -### Example - -### Keywords - - ADMIN, SHOW, REPLICA, STATUS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md deleted file mode 100644 index a7b5d49a91..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "INSTALL-PLUGIN", - "language": "en" -} ---- - - - -## INSTALL-PLUGIN - -### Description - -### Example - -### Keywords - - INSTALL, PLUGIN - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md deleted file mode 100644 index fe93c90be8..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "KILL", - "language": "en" -} ---- - - - -## KILL - -### Description - -### Example - -### Keywords - - KILL - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md deleted file mode 100644 index ccfe365973..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "RECOVER", - "language": "en" -} ---- - - - -## RECOVER - -### Description - -### Example - -### Keywords - - RECOVER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md deleted file mode 100644 index 4c59725a02..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SET-VARIABLE", - "language": "en" -} ---- - - - -## SET-VARIABLE - -### Description - -### Example - -### Keywords - - SET, VARIABLE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md b/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md deleted file mode 100644 index ab482e4661..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "UNINSTALL-PLUGIN", - "language": "en" -} ---- - - - -## UNINSTALL-PLUGIN - -### Description - -### Example - -### Keywords - - UNINSTALL, PLUGIN - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md deleted file mode 100644 index 93ee8dd89c..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ALTER", - "language": "en" -} ---- - - - -## SHOW-ALTER - -### Description - -### Example - -### Keywords - - SHOW, ALTER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md deleted file mode 100644 index 3c86aaaa7f..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-BACKENDS", - "language": "en" -} ---- - - - -## SHOW-BACKENDS - -### Description - -### Example - -### Keywords - - SHOW, BACKENDS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md deleted file mode 100644 index b8583ee74b..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-BACKUP", - "language": "en" -} ---- - - - -## SHOW-BACKUP - -### Description - -### Example - -### Keywords - - SHOW, BACKUP - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md deleted file mode 100644 index 4fe3a7f5c6..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-BROKER", - "language": "en" -} ---- - - - -## SHOW-BROKER - -### Description - -### Example - -### Keywords - - SHOW, BROKER - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md deleted file mode 100644 index f9ad64cf52..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-COLUMNS", - "language": "en" -} ---- - - - -## SHOW-COLUMNS - -### Description - -### Example - -### Keywords - - SHOW, COLUMNS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md deleted file mode 100644 index 53e4ec81ad..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-CREATE-DATABASE", - "language": "en" -} ---- - - - -## SHOW-CREATE-DATABASE - -### Description - -### Example - -### Keywords - - SHOW, CREATE, DATABASE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md deleted file mode 100644 index 3ebe2b8234..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-CREATE-FUNCTION", - "language": "en" -} ---- - - - -## SHOW-CREATE-FUNCTION - -### Description - -### Example - -### Keywords - - SHOW, CREATE, FUNCTION - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md deleted file mode 100644 index 8a0200b7c4..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-CREATE-ROUTINE-LOAD", - "language": "en" -} ---- - - - -## SHOW-CREATE-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - SHOW, CREATE, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md deleted file mode 100644 index 67587b102a..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-CREATE-TABLE", - "language": "en" -} ---- - - - -## SHOW-CREATE-TABLE - -### Description - -### Example - -### Keywords - - SHOW, CREATE, TABLE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md deleted file mode 100644 index cf03a8c76e..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DATA", - "language": "en" -} ---- - - - -## SHOW-DATA - -### Description - -### Example - -### Keywords - - SHOW, DATA - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md deleted file mode 100644 index 2279b4de52..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DATABASE-ID", - "language": "en" -} ---- - - - -## SHOW-DATABASE-ID - -### Description - -### Example - -### Keywords - - SHOW, DATABASE, ID - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md deleted file mode 100644 index 1e8f6705da..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DATABASES", - "language": "en" -} ---- - - - -## SHOW-DATABASES - -### Description - -### Example - -### Keywords - - SHOW, DATABASES - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md deleted file mode 100644 index cdae222606..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DELETE", - "language": "en" -} ---- - - - -## SHOW-DELETE - -### Description - -### Example - -### Keywords - - SHOW, DELETE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md deleted file mode 100644 index 2fccdba2d1..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DYNAMIC-PARTITION", - "language": "en" -} ---- - - - -## SHOW-DYNAMIC-PARTITION - -### Description - -### Example - -### Keywords - - SHOW, DYNAMIC, PARTITION - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md deleted file mode 100644 index f45ab611c3..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ENCRYPT-KEY", - "language": "en" -} ---- - - - -## SHOW-ENCRYPT-KEY - -### Description - -### Example - -### Keywords - - SHOW, ENCRYPT, KEY - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md deleted file mode 100644 index fab358e3f7..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-EXPORT", - "language": "en" -} ---- - - - -## SHOW-EXPORT - -### Description - -### Example - -### Keywords - - SHOW, EXPORT - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md deleted file mode 100644 index a946cd3230..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-FRONTENDS", - "language": "en" -} ---- - - - -## SHOW-FRONTENDS - -### Description - -### Example - -### Keywords - - SHOW, FRONTENDS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md deleted file mode 100644 index 82f2dd7f8d..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-FUNCTIONS", - "language": "en" -} ---- - - - -## SHOW-FUNCTIONS - -### Description - -### Example - -### Keywords - - SHOW, FUNCTIONS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md deleted file mode 100644 index 3bee8512c1..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-GRANTS", - "language": "en" -} ---- - - - -## SHOW-GRANTS - -### Description - -### Example - -### Keywords - - SHOW, GRANTS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md deleted file mode 100644 index 9b4f239fcf..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-INDEX", - "language": "en" -} ---- - - - -## SHOW-INDEX - -### Description - -### Example - -### Keywords - - SHOW, INDEX - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md deleted file mode 100644 index 9bdcad5da3..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-LOAD-PROFILE", - "language": "en" -} ---- - - - -## SHOW-LOAD-PROFILE - -### Description - -### Example - -### Keywords - - SHOW, LOAD, PROFILE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md deleted file mode 100644 index 09fef2eb22..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-LOAD-WARNINGS", - "language": "en" -} ---- - - - -## SHOW-LOAD-WARNINGS - -### Description - -### Example - -### Keywords - - SHOW, LOAD, WARNINGS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md deleted file mode 100644 index 06b5897243..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-LOAD", - "language": "en" -} ---- - - - -## SHOW-LOAD - -### Description - -### Example - -### Keywords - - SHOW, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md deleted file mode 100644 index 4d32fd4c0f..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-MIGRATIONS", - "language": "en" -} ---- - - - -## SHOW-MIGRATIONS - -### Description - -### Example - -### Keywords - - SHOW, MIGRATIONS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md deleted file mode 100644 index 3fd215f958..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PARTITION-ID", - "language": "en" -} ---- - - - -## SHOW-PARTITION-ID - -### Description - -### Example - -### Keywords - - SHOW, PARTITION, ID - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md deleted file mode 100644 index d5b0695463..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PARTITIONS", - "language": "en" -} ---- - - - -## SHOW-PARTITIONS - -### Description - -### Example - -### Keywords - - SHOW, PARTITIONS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md deleted file mode 100644 index d96ce805fd..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PLUGINS", - "language": "en" -} ---- - - - -## SHOW-PLUGINS - -### Description - -### Example - -### Keywords - - SHOW, PLUGINS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md deleted file mode 100644 index 1e85c7b3e0..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PROC", - "language": "en" -} ---- - - - -## SHOW-PROC - -### Description - -### Example - -### Keywords - - SHOW, PROC - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md deleted file mode 100644 index c587c732c9..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PROCESSLIST", - "language": "en" -} ---- - - - -## SHOW-PROCESSLIST - -### Description - -### Example - -### Keywords - - SHOW, PROCESSLIST - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md deleted file mode 100644 index a89000dedf..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -{ - "title": "SHOW-PROPERTY", - "language": "zh-CN" -} ---- - - - -## SHOW-PROPERTY - -### Description - -This statement is used to view the attributes of the user - -``` -SHOW PROPERTY [FOR user] [LIKE key]; -``` - -* `user` - - View the attributes of the specified user. If not specified, check the current user's. - -* `LIKE` - - Fuzzy matching can be done by attribute name. - -Return result description: - -```sql -mysql> show property like'%connection%'; -+----------------------+-------+ -| Key | Value | -+----------------------+-------+ -| max_user_connections | 100 | -+----------------------+-------+ -1 row in set (0.01 sec) -``` - -* `Key` - - Property name. - -* `Value` - - Attribute value. - -### Example - -1. View the attributes of the jack user - - ```sql - SHOW PROPERTY FOR'jack'; - ``` - -2. View the attribute of jack user connection limit - - ```sql - SHOW PROPERTY FOR'jack' LIKE'%connection%'; - ``` - -### Keywords - - SHOW, PROPERTY - -### Best Practice diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md deleted file mode 100644 index d037fe4800..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-REPOSITORIES", - "language": "en" -} ---- - - - -## SHOW-REPOSITORIES - -### Description - -### Example - -### Keywords - - SHOW, REPOSITORIES - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md deleted file mode 100644 index 68d642ccc8..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-RESOURCES", - "language": "en" -} ---- - - - -## SHOW-RESOURCES - -### Description - -### Example - -### Keywords - - SHOW, RESOURCES - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md deleted file mode 100644 index 5d5f631c3c..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-RESTORE", - "language": "en" -} ---- - - - -## SHOW-RESTORE - -### Description - -### Example - -### Keywords - - SHOW, RESTORE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md deleted file mode 100644 index e982e89eb0..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ROLES", - "language": "en" -} ---- - - - -## SHOW-ROLES - -### Description - -### Example - -### Keywords - - SHOW, ROLES - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md deleted file mode 100644 index 481c8ecd63..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ROUTINE-LOAD-TASK", - "language": "en" -} ---- - - - -## SHOW-ROUTINE-LOAD-TASK - -### Description - -### Example - -### Keywords - - SHOW, ROUTINE, LOAD, TASK - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md deleted file mode 100644 index fe0ab3a1c5..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ROUTINE-LOAD", - "language": "en" -} ---- - - - -## SHOW-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - SHOW, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md deleted file mode 100644 index 214c108aec..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-SMALL-FILES", - "language": "en" -} ---- - - - -## SHOW-SMALL-FILES - -### Description - -### Example - -### Keywords - - SHOW, SMALL, FILES - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md deleted file mode 100644 index 048daac9be..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-SNAPSHOT", - "language": "en" -} ---- - - - -## SHOW-SNAPSHOT - -### Description - -### Example - -### Keywords - - SHOW, SNAPSHOT - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md deleted file mode 100644 index 9d26596d1f..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-STATUS", - "language": "en" -} ---- - - - -## SHOW-STATUS - -### Description - -### Example - -### Keywords - - SHOW, STATUS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md deleted file mode 100644 index 840d3e00c5..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-STREAM-LOAD", - "language": "en" -} ---- - - - -## SHOW-STREAM-LOAD - -### Description - -### Example - -### Keywords - - SHOW, STREAM, LOAD - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md deleted file mode 100644 index 015bbfd325..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-TABLE-ID", - "language": "en" -} ---- - - - -## SHOW-TABLE-ID - -### Description - -### Example - -### Keywords - - SHOW, TABLE, ID - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md deleted file mode 100644 index 9bf2bf5806..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-TABLE-STATUS", - "language": "en" -} ---- - - - -## SHOW-TABLE-STATUS - -### Description - -### Example - -### Keywords - - SHOW, TABLE, STATUS - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md deleted file mode 100644 index 8478411b5b..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-TABLET", - "language": "en" -} ---- - - - -## SHOW-TABLET - -### Description - -### Example - -### Keywords - - SHOW, TABLET - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md deleted file mode 100644 index b04e5dfdba..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-TRANSACTION", - "language": "en" -} ---- - - - -## SHOW-TRANSACTION - -### Description - -### Example - -### Keywords - - SHOW, TRANSACTION - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md deleted file mode 100644 index 6ab1c01086..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-VARIABLES", - "language": "en" -} ---- - - - -## SHOW-VARIABLES - -### Description - -### Example - -### Keywords - - SHOW, VARIABLES - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md b/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md deleted file mode 100644 index e638427cf8..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-VIEW", - "language": "en" -} ---- - - - -## SHOW-VIEW - -### Description - -### Example - -### Keywords - - SHOW, VIEW - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md b/docs/en/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md deleted file mode 100644 index e854bdd6df..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DESCRIBE", - "language": "en" -} ---- - - - -## DESCRIBE - -### Description - -### Example - -### Keywords - - DESCRIBE - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/HELP.md b/docs/en/sql-reference-v2/sql-statements/Utility-Statements/HELP.md deleted file mode 100644 index 29a43b975a..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/HELP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "HELP", - "language": "en" -} ---- - - - -## HELP - -### Description - -### Example - -### Keywords - - HELP - -### Best Practice - diff --git a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/USE.md b/docs/en/sql-reference-v2/sql-statements/Utility-Statements/USE.md deleted file mode 100644 index 00ce4dc298..0000000000 --- a/docs/en/sql-reference-v2/sql-statements/Utility-Statements/USE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "USE", - "language": "en" -} ---- - - - -## USE - -### Description - -### Example - -### Keywords - - USE - -### Best Practice - diff --git a/docs/en/sql-reference/sql-statements/Account Management/CREATE ROLE.md b/docs/en/sql-reference/sql-statements/Account Management/CREATE ROLE.md deleted file mode 100644 index 1ee11b440c..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/CREATE ROLE.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "CREATE ROLE", - "language": "en" -} ---- - - - -# CREATE ROLE -## Description -The statement user creates a role - -Grammar: -CREATE ROLE role1; - -This statement creates an unauthorized role that can be subsequently granted permission through the GRANT command. - -## example - -1. Create a role - -CREATE ROLE role1; - -## keyword -CREATE, ROLE - - diff --git a/docs/en/sql-reference/sql-statements/Account Management/CREATE USER.md b/docs/en/sql-reference/sql-statements/Account Management/CREATE USER.md deleted file mode 100644 index 79c72de866..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/CREATE USER.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -{ - "title": "CREATE USER", - "language": "en" -} ---- - - - -# CREATE USER -##Description - -Syntax: - -CREATE USER user_identity [IDENTIFIED BY 'password'] [DEFAULT ROLE 'role_name'] - -user_identity: -'user_name'@'host' - -The CREATE USER command is used to create a Doris user. In Doris, a user_identity uniquely identifies a user. User_identity consists of two parts, user_name and host, where username is the user name. The host identifies the host address where the client connects. The host part can use% for fuzzy matching. If no host is specified, the default is'%', which means that the user can connect to Doris from any host. - -The host part can also be specified as a domain with the grammar:'user_name'@['domain']. Even if surrounded by brackets, Doris will think of it as a domain and try to parse its IP address. At present, it only supports BNS analysis within Baidu. - -If a role (ROLE) is specified, the permissions that the role has are automatically granted to the newly created user. If not specified, the user defaults to having no permissions. The specified ROLE must already exist. - -## example - -1. Create a passwordless user (without specifying host, it is equivalent to Jack @'%') - -CREATE USER 'jack'; - -2. Create a password user that allows login from'172.10.1.10' - -CREATE USER jack@'172.10.1.10' IDENTIFIED BY '123456'; - -3. To avoid passing plaintext, use case 2 can also be created in the following way - -CREATE USER jack@'172.10.1.10' IDENTIFIED BY PASSWORD '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9'; - -Later encrypted content can be obtained through PASSWORD (), for example: - -SELECT PASSWORD('123456'); - -4. Create a user who is allowed to log in from the `192.168` subnet and specify its role as example_role - -CREATE USER 'jack'@'192.168.%' DEFAULT ROLE 'example_role'; - -5. Create a user who is allowed to log in from the domain name 'example_domain'. - -CREATE USER 'jack'@['example_domain'] IDENTIFIED BY '12345'; - -6. Create a user and specify a role - -CREATE USER 'jack'@'%' IDENTIFIED BY '12345' DEFAULT ROLE 'my_role'; - -## keyword -CREATE, USER diff --git a/docs/en/sql-reference/sql-statements/Account Management/DROP ROLE.md b/docs/en/sql-reference/sql-statements/Account Management/DROP ROLE.md deleted file mode 100644 index 6966f8241a..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/DROP ROLE.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -{ - "title": "DROP ROLE", - "language": "en" -} ---- - - - -# DROP ROLE -## Description -The statement user deletes a role - -Grammar: -DROP ROLE role1; - -Deleting a role does not affect the permissions of users who previously belonged to that role. It is only equivalent to decoupling the role from the user. The permissions that the user has obtained from the role will not change. - -## example - -1. Delete a role - -DROP ROLE role1; - -## keyword -DROP, ROLE diff --git a/docs/en/sql-reference/sql-statements/Account Management/DROP USER.md b/docs/en/sql-reference/sql-statements/Account Management/DROP USER.md deleted file mode 100644 index 8860140ddc..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/DROP USER.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "DROP USER", - "language": "en" -} ---- - - - -# DROP USER -## Description - -Syntax: - - DROP USER 'user_identity' - - `user_identity`: - - user@'host' - user@['domain'] - - Drop a specified user identity. - -## example - -1. Delete user jack@'192.%' - - DROP USER 'jack'@'192.%' - -## keyword - - DROP, USER diff --git a/docs/en/sql-reference/sql-statements/Account Management/GRANT.md b/docs/en/sql-reference/sql-statements/Account Management/GRANT.md deleted file mode 100644 index 0d59295d11..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/GRANT.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -{ - "title": "Grant", - "language": "en" -} ---- - - - -# Grant -## Description - -The GRANT command is used to give the specified user or role the specified permissions. - -Syntax: - -GRANT privilege_list ON db_name[.tbl_name] TO user_identity [ROLE role_name] - - -Privilege_list is a list of permissions that need to be granted, separated by commas. Currently Doris supports the following permissions: - -NODE_PRIV: Operational privileges of cluster nodes, including operation of nodes' up and down lines. Only root users have this privilege and cannot be given to other users. -ADMIN_PRIV: All rights except NODE_PRIV. -GRANT_PRIV: Permission to operate permissions. Including the creation and deletion of users, roles, authorization and revocation, password settings and so on. -SELECT_PRIV: Read permissions for specified libraries or tables -LOAD_PRIV: Import permissions for specified libraries or tables -ALTER_PRIV: schema change permissions for specified libraries or tables -CREATE_PRIV: Creation permissions for specified libraries or tables -DROP_PRIV: Delete permissions for specified libraries or tables - -旧版权限中的 ALL 和 READ_WRITE 会被转换成: SELECT_PRIV,LOAD_PRIV,ALTER_PRIV,CREATE_PRIV,DROP_PRIV; -READ_ONLY is converted to SELECT_PRIV. - -Db_name [.tbl_name] supports the following three forms: - -1. *. * permissions can be applied to all libraries and all tables in them -2. db. * permissions can be applied to all tables under the specified library -3. db.tbl permissions can be applied to specified tables under specified Libraries - -The libraries or tables specified here can be non-existent libraries and tables. - -user_identity: - -The user_identity syntax here is the same as CREATE USER. And you must create user_identity for the user using CREATE USER. The host in user_identity can be a domain name. If it is a domain name, the validity time of permissions may be delayed by about one minute. - -You can also grant permissions to the specified ROLE, which is automatically created if the specified ROLE does not exist. - -## example - -1. Grant permissions to all libraries and tables to users - -GRANT SELECT_PRIV ON *.* TO 'jack'@'%'; - -2. Grant permissions to specified library tables to users - -GRANT SELECT_PRIV,ALTER_PRIV,LOAD_PRIV ON db1.tbl1 TO 'jack'@'192.8.%'; - -3. Grant permissions to specified library tables to roles - -GRANT LOAD_PRIV ON db1.* TO ROLE 'my_role'; - -## keyword -GRANT - diff --git a/docs/en/sql-reference/sql-statements/Account Management/REVOKE.md b/docs/en/sql-reference/sql-statements/Account Management/REVOKE.md deleted file mode 100644 index d619f20ffb..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/REVOKE.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -{ - "title": "REVOKE", - "language": "en" -} ---- - - - -# REVOKE -## Description - -The REVOKE command is used to revoke the rights specified by the specified user or role. -Syntax -REVOKE privilege_list ON db_name[.tbl_name] FROM user_identity [ROLE role_name] - -user_identity: - -The user_identity syntax here is the same as CREATE USER. And you must create user_identity for the user using CREATE USER. The host in user_identity can be a domain name. If it is a domain name, the revocation time of permission may be delayed by about one minute. - -You can also revoke the permission of the specified ROLE, which must exist for execution. - -## example - -1. Revoke the rights of user Jack database testDb - -REVOKE SELECT_PRIV ON db1.* FROM 'jack'@'192.%'; - -## keyword - -REVOKE diff --git a/docs/en/sql-reference/sql-statements/Account Management/SET PASSWORD.md b/docs/en/sql-reference/sql-statements/Account Management/SET PASSWORD.md deleted file mode 100644 index d796e853a9..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/SET PASSWORD.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -{ - "title": "SET PASSWORD", - "language": "en" -} ---- - - - -# SET PASSWORD -## Description - -Syntax: - -SET PASSWORD [FOR user_identity] = -[PASSWORD('plain password')]|['hashed password'] - -The SET PASSWORD command can be used to modify a user's login password. If the [FOR user_identity] field does not exist, modify the password of the current user. - -Note that the user_identity here must match exactly the user_identity specified when creating a user using CREATE USER, otherwise the user will be reported as non-existent. If user_identity is not specified, the current user is 'username'@'ip', which may not match any user_identity. The current user can be viewed through SHOW GRANTS. - -PASSWORD () input is a plaintext password, and direct use of strings, you need to pass the encrypted password. -If you change the password of other users, you need to have administrator privileges. - -## example - -1. Modify the password of the current user - -SET PASSWORD = PASSWORD('123456') -SET PASSWORD = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' - -2. Modify the specified user password - -SET PASSWORD FOR 'jack'@'192.%' = PASSWORD('123456') -SET PASSWORD FOR 'jack'@['domain'] = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' - -## keyword -SET, PASSWORD diff --git a/docs/en/sql-reference/sql-statements/Account Management/SET PROPERTY.md b/docs/en/sql-reference/sql-statements/Account Management/SET PROPERTY.md deleted file mode 100644 index 3439e9b137..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/SET PROPERTY.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -{ - "title": "SET PROPERTY", - "language": "en" -} ---- - - - -# SET PROPERTY -## Description - -Syntax: - -SET PROPERTY [FOR 'user'] 'key' = 'value' [, 'key' = 'value'] - -Set user attributes, including resources allocated to users, import cluster, etc. The user attributes set here are for user, not user_identity. That is to say, if two users 'jack'@'%' and 'jack'@'192%'are created through the CREATE USER statement, the SET PROPERTY statement can only be used for the jack user, not 'jack'@'%' or 'jack'@'192%' - -Importing cluster is only applicable to Baidu internal users. - -key: - -Super user rights: -max_user_connections: Maximum number of connections. -max_query_instances: Maximum number of query instance user can use when query. -sql_block_rules: set sql block rules.After setting, if the query user execute match the rules, it will be rejected. -cpu_resource_limit: limit the cpu resource usage of a query. See session variable `cpu_resource_limit`. -exec_mem_limit: Limit the memory usage of the query. See the description of the session variable `exec_mem_limit` for details. -1 means not set. -load_mem_limit: Limit memory usage for imports. See the introduction of the session variable `load_mem_limit` for details. -1 means not set. -resource.cpu_share: cpu resource assignment.(Derepcated) -Load_cluster. {cluster_name}. priority: assigns priority to a specified cluster, which can be HIGH or NORMAL -resource_tags: Specify the user's resource tag permissions. - -> Notice: The `cpu_resource_limit`, `exec_mem_limit`, and `load_mem_limit` properties default to the values in the session variables if they are not set. - -Ordinary user rights: -Quota.normal: Resource allocation at the normal level. -Quota.high: Resource allocation at the high level. -Quota.low: Resource allocation at low level. - -Load_cluster. {cluster_name}. hadoop_palo_path: The Hadoop directory used by Palo needs to store ETL programs and intermediate data generated by ETL for Palo to import. After the import is completed, the intermediate data will be automatically cleaned up, and the ETL program will be automatically reserved for next use. -Load_cluster. {cluster_name}. hadoop_configs: configuration of hadoop, where fs. default. name, mapred. job. tracker, hadoop. job. UGI must be filled in. -Load_cluster. {cluster_name}. hadoop_port: Hadoop HDFS name node http} -Default_load_cluster: The default import cluster. - -## example - -1. Modify the maximum number of user jacks to 1000 -SET PROPERTY FOR 'jack' 'max_user_connections' = '1000'; - -2. Modify the cpu_share of user Jack to 1000 -SET PROPERTY FOR 'jack' 'resource.cpu_share' = '1000'; - -3. Modify the weight of the normal group of Jack users -Set property for 'jack''quota. normal' = 400'; - -4. Add import cluster for user jack -SET PROPERTY FOR 'jack' -'load 'cluster.{cluster name}.hadoop' palo path' ='/user /palo /palo path', -'load_cluster.{cluster_name}.hadoop_configs' = 'fs.default.name=hdfs://dpp.cluster.com:port;mapred.job.tracker=dpp.cluster.com:port;hadoop.job.ugi=user,password;mapred.job.queue.name=job_queue_name_in_hadoop;mapred.job.priority=HIGH;'; - -5. Delete the import cluster under user jack. -SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}' = ''; - -6. Modify user jack's default import cluster -SET PROPERTY FOR 'jack' 'default_load_cluster' = '{cluster_name}'; - -7. Modify the cluster priority of user Jack to HIGH -SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}.priority' = 'HIGH'; - -8. Modify the maximum number of query instance for jack to 3000 -SET PROPERTY FOR 'jack' 'max_query_instances' = '3000'; - -9. Modify the sql block rule for jack -SET PROPERTY FOR 'jack' 'sql_block_rules' = 'rule1, rule2'; - -10. Modify the cpu resource usage limit for jack -SET PROPERTY FOR 'jack' 'cpu_resource_limit' = '2'; - -11. Modify user's resource tag permission -SET PROPERTY FOR 'jack' 'resource_tags.location' = 'group_a, group_b'; - -12. modify the user's query memory usage limit in bytes -SET PROPERTY FOR 'jack' 'exec_mem_limit' = '2147483648'; - -13. modify the user's import memory usage limit in bytes -SET PROPERTY FOR 'jack' 'load_mem_limit' = '2147483648'; - -## keyword -SET, PROPERTY - diff --git a/docs/en/sql-reference/sql-statements/Account Management/SHOW GRANTS.md b/docs/en/sql-reference/sql-statements/Account Management/SHOW GRANTS.md deleted file mode 100644 index a12ac9bf91..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/SHOW GRANTS.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -{ - "title": "SHOW GRANTS", - "language": "en" -} ---- - - - -# SHOW GRANTS -## Description - -This statement is used to view user rights. - -Grammar: -SHOW [ALL] GRANTS [FOR user_identity]; - -Explain: -1. SHOW ALL GRANTS can view the privileges of all users. -2. If you specify user_identity, view the permissions of the specified user. And the user_identity must be created for the CREATE USER command. -3. If you do not specify user_identity, view the permissions of the current user. - - -## example - -1. View all user rights information - -SHOW ALL GRANTS; - -2. View the permissions of the specified user - -SHOW GRANTS FOR jack@'%'; - -3. View the permissions of the current user - -SHOW GRANTS; - -## keyword -SHOW, GRANTS diff --git a/docs/en/sql-reference/sql-statements/Account Management/SHOW ROLES.md b/docs/en/sql-reference/sql-statements/Account Management/SHOW ROLES.md deleted file mode 100644 index a27d30dbd1..0000000000 --- a/docs/en/sql-reference/sql-statements/Account Management/SHOW ROLES.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -{ - "title": "SHOW ROLES", - "language": "en" -} ---- - - - -# SHOW ROLES -## Description -This statement is used to display all created role information, including the role name, the user included, and the permissions. - -Grammar: -SHOW ROLES; - -## example - -1. View the created roles: - -SHOW ROLES; - -## keyword -SHOW,ROLES diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md deleted file mode 100644 index 475e266306..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -{ - "title": "ADMIN CANCEL REBALANCE DISK", - "language": "en" -} ---- - - - -# ADMIN CANCEL REBALANCE DISK -## Description - -This statement is used to cancel rebalancing disks of specified backends with high priority - -Grammar: - -ADMIN CANCEL REBALANCE DISK [ON ("BackendHost1:BackendHeartBeatPort1", "BackendHost2:BackendHeartBeatPort2", ...)]; - -Explain: - -1. This statement only indicates that the system no longer rebalance disks of specified backends with high priority. The system will still rebalance disks by default scheduling. - -## example - -1. Cancel High Priority Disk Rebalance of all of backends of the cluster - -ADMIN CANCEL REBALANCE DISK; - -2. Cancel High Priority Disk Rebalance of specified backends - -ADMIN CANCEL REBALANCE DISK ON ("192.168.1.1:1234", "192.168.1.2:1234"); - -## keyword -ADMIN,CANCEL,REBALANCE DISK diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md deleted file mode 100644 index db001c90b3..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -{ - "title": "ADMIN CANCEL REPAIR", - "language": "en" -} ---- - - - -# ADMIN CANCEL REPAIR -## Description - -This statement is used to cancel repairing a specified table or partition with high priority - -Grammar: - -ADMIN CANCEL REPAIR TABLE table_name[ PARTITION (p1,...)]; - -Explain: - -1. This statement only indicates that the system no longer repairs fragmented copies of specified tables or partitions with high priority. The system will still repair the copy by default scheduling. - -## example - -1. Cancel High Priority Repair - -ADMIN CANCEL REPAIR TABLE tbl PARTITION(p1); - -## keyword -ADMIN,CANCEL,REPAIR diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md deleted file mode 100644 index 101d506289..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -{ - "title": "ADMIN CHECK TABLET", - "language": "en" -} ---- - - - -# ADMIN CHECK TABLET -## description - -This statement is used to perform a specified check operation on a list of tablets. - -Syntax: - -``` -ADMIN CHECK TABLE (tablet_id1, tablet_id2, ...) -PROPERTIES("type" = "..."); -``` - -Note: - -1. You must specify the list of tablet ids and the "type" property in PROPERTIES. -2. Currently "type" only supports: - - * consistency: Check the consistency of the replicas of the tablet. This command is asynchronous. After sending, Doris will start to perform the consistency check job of the corresponding tablet. The final result will be reflected in the "InconsistentTabletNum" column in the result of `SHOW PROC" / statistic "; - -## example - -1. Perform a replica consistency check on a specified set of tablets - - ``` - ADMIN CHECK TABLET (10000, 10001) - PROPERTIES("type" = "consistency"); - ``` - -## keyword - - ADMIN,CHECK,TABLET diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md deleted file mode 100644 index 0511a3e47b..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -{ - "title": "ADMIN CLEAN TRASH", - "language": "en" -} ---- - - - -# ADMIN CLEAN TRASH -## description - This statement is used to clean up the trash data in the backend. - Grammar: - ADMIN CLEAN TRASH [ON ("BackendHost1:BackendHeartBeatPort1", "BackendHost2:BackendHeartBeatPort2", ...)]; - - Explain: - Take BackendHost:BackendHeartBeatPort to indicate the backend that needs to be cleaned up, and clean up all backends without adding the on limit. - -## example - - 1. Clean up the trash data of all be nodes. - - ADMIN CLEAN TRASH; - - 2. Clean up the trash data of '192.168.0.1:9050' and '192.168.0.2:9050'. - - ADMIN CLEAN TRASH ON ("192.168.0.1:9050","192.168.0.2:9050"); - -## keyword - ADMIN, CLEAN, TRASH diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN COMPACT.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN COMPACT.md deleted file mode 100644 index f460d6e016..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN COMPACT.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -{ - "title": "ADMIN COMPACT", - "language": "en" -} ---- - - - -# ADMIN COMPACT -## Description - - This statement is used to trigger compaction for all replicas of a specified partition - - Grammar: - - ADMIN COMPACT TABLE table_name PARTITION partition_name WHERE TYPE='BASE/CUMULATIVE' - - Explain: - - 1. This statement only means that the system attempts to submit a compaction task for each replica under the specified partition to compaction thread pool, and it is not guaranteed to be successful. - 2. This statement supports executing compaction task for a single partition of the table at a time. - -## example - - 1. Attempt to trigger cumulative compaction for all replicas under the specified partition - - ADMIN COMPACT TABLE tbl PARTITION par01 WHERE TYPE='CUMULATIVE'; - - 2. Attempt to trigger base compaction for all replicas under the specified partition - - ADMIN COMPACT TABLE tbl PARTITION par01 WHERE TYPE='BASE'; - -## keyword - ADMIN,COMPACT diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md deleted file mode 100644 index 6e1c1aaa34..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -{ - "title": "ADMIN REBALANCE DISK", - "language": "en" -} ---- - - - -# ADMIN REBALANCE DISK -## Description - -This statement is used to try to rebalance disks of the specified backends first, no matter if the cluster is balanced - -Grammar: - -ADMIN REBALANCE DISK [ON ("BackendHost1:BackendHeartBeatPort1", "BackendHost2:BackendHeartBeatPort2", ...)]; - -Explain: - -1. This statement only means that the system attempts to rebalance disks of specified backends with high priority, no matter if the cluster is balanced. -2. The default timeout is 24 hours. Timeout means that the system will no longer rebalance disks of specified backends with high priority. The command settings need to be reused. - -## example - -1. Attempt to rebalance disks of all backends - -ADMIN REBALANCE DISK; - -2. Attempt to rebalance disks oof the specified backends - -ADMIN REBALANCE DISK ON ("192.168.1.1:1234", "192.168.1.2:1234"); - -## keyword -ADMIN,REBALANCE,DISK diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN REPAIR.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN REPAIR.md deleted file mode 100644 index 8cb037be65..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN REPAIR.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -{ - "title": "ADMIN REPAIR", - "language": "en" -} ---- - - - -# ADMIN REPAIR -## Description - -This statement is used to try to fix the specified table or partition first - -Grammar: - -ADMIN REPAIR TABLE table_name[ PARTITION (p1,...)] - -Explain: - -1. This statement only means that the system attempts to repair a fragmented copy of a specified table or partition with high priority, and it is not guaranteed to be successful. Users can view the repair status through the ADMIN SHOW REPLICA STATUS command. -2. The default timeout is 14400 seconds (4 hours). Timeout means that the system will no longer repair fragmented copies of specified tables or partitions with high priority. The command settings need to be reused. - -## example - -1. Attempt to fix the specified table - -ADMIN REPAIR TABLE tbl1; - -2. Attempt to fix the specified partition - -ADMIN REPAIR TABLE tbl1 PARTITION (p1, p2); - -## keyword -ADMIN,REPAIR diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md deleted file mode 100644 index ad87db2739..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "ADMIN SET CONFIG", - "language": "en" -} ---- - - - -# ADMIN SET CONFIG -## Description - -This statement is used to set the configuration items for the cluster (currently only the configuration items for setting FE are supported). -Settable configuration items can be viewed through `ADMIN SHOW FRONTEND CONFIG;` commands. - -Grammar: - -ADMIN SET FRONTEND CONFIG ("key" = "value"); - -## example - -1. "disable balance" true - -ADMIN SET FRONTEND CONFIG ("disable_balance" = "true"); - -## keyword -ADMIN,SET,CONFIG diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md deleted file mode 100644 index 03764b53d2..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -{ - "title": "ADMIN SET REPLICA STATUS", - "language": "en" -} ---- - - - -# ADMIN SET REPLICA STATUS -## description - - This commend is used to set the status of the specified replica. -    This command is currently only used to manually set the status of some replicas to BAD or OK, allowing the system to automatically repair these replicas. - - Syntax: - - ADMIN SET REPLICA STATUS - PROPERTIES ("key" = "value", ...); - - The following attributes are currently supported: - "tablet_id": required. Specify a Tablet Id. - "backend_id": required. Specify a Backend Id. - "status": required. Specify the status. Only "bad" and "ok" are currently supported. - - If the specified replica does not exist or the status is already bad or ok, it will be ignored. - - Notice: - - Replica set to Bad status may be dropped immediately, please proceed with caution. - -## example - - 1. Set the replica status of tablet 10003 on BE 10001 to bad. - - ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10003", "backend_id" = "10001", "status" = "bad"); - - 2. Set the replica status of tablet 10003 on BE 10001 to ok. - - ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10003", "backend_id" = "10001", "status" = "ok"); - -## keyword - - ADMIN,SET,REPLICA,STATUS - diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md deleted file mode 100644 index 3be8ca88cf..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -{ - "title": "ADMIN SHOW CONFIG", - "language": "en" -} ---- - - - -# ADMIN SHOW CONFIG -## Description - -This statement is used to show the configuration of the current cluster (currently only supporting the display of FE configuration items) - -Grammar: - -ADMIN SHOW FRONTEND CONFIG [LIKE "pattern"]; - -Explain: - -The implications of the results are as follows: -1. Key: Configuration item name -2. Value: Configuration item value -3. Type: Configuration item type -4. IsMutable: 是否可以通过 ADMIN SET CONFIG 命令设置 -5. MasterOnly: 是否仅适用于 Master FE -6. Comment: Configuration Item Description - -## example - -1. View the configuration of the current FE node - -ADMIN SHOW FRONTEND CONFIG; - -2. Search for a configuration of the current Fe node with like predicate - -mysql> ADMIN SHOW FRONTEND CONFIG LIKE '%check_java_version%'; -+--------------------+-------+---------+-----------+------------+---------+ -| Key | Value | Type | IsMutable | MasterOnly | Comment | -+--------------------+-------+---------+-----------+------------+---------+ -| check_java_version | true | boolean | false | false | | -+--------------------+-------+---------+-----------+------------+---------+ -1 row in set (0.00 sec) - -## keyword -ADMIN,SHOW,CONFIG diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md deleted file mode 100644 index b521ee0889..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -{ - "title": "ADMIN SHOW REPLICA DISTRIBUTION", - "language": "en" -} ---- - - - -# ADMIN SHOW REPLICA DISTRIBUTION -## Description - -This statement is used to show the distribution status of a table or partition replica - -Grammar: - -ADMIN SHOW REPLICA DISTRIBUTION FROM [db_name.]tbl_name [PARTITION (p1, ...)]; - -Explain: - -The Graph column in the result shows the distribution ratio of replicas graphically - -## example - -1. View the distribution of replicas of tables - -ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1; - -2. View the distribution of copies of partitions in the table - -ADMIN SHOW REPLICA DISTRIBUTION FROM db1.tbl1 PARTITION(p1, p2); - -## keyword -ADMIN,SHOW,REPLICA,DISTRIBUTION diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md deleted file mode 100644 index 26486e9f5c..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -{ - "title": "ADMIN SHOW REPLICA STATUS", - "language": "en" -} ---- - - - -# ADMIN SHOW REPLICA STATUS -## Description - -This statement is used to display copy status information for a table or partition - -Grammar: - -ADMIN SHOW REPLICA STATUS FROM [dbu name.]tbl name [PARTITION (p1,...)] -[where_clause]; - -where_clause: -WHERE STATUS [!]= "replica_status" - -Reply status: -OK: Replica 22788;'20581;' 29366;'24577; -DEAD: The Backend of replica is not available -VERSION_ERROR: The replica data version is missing -SCHEMA ERROR: replica schema hash -MISSING: replica does not exist - -## example - -1. View the status of all copies of the table - -ADMIN SHOW REPLICA STATUS FROM db1.tbl1; - -2. View a copy of a partition state of the table as VERSION_ERROR - -ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) - - -3. Check all unhealthy copies of the table - -ADMIN SHOW REPLICA STATUS FROM tbl1 -WHERE STATUS != "OK"; - -## keyword -ADMIN,SHOW,REPLICA,STATUS diff --git a/docs/en/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md b/docs/en/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md deleted file mode 100644 index e41bf98c53..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -{ - "title": "ADMIN DIAGNOSE TABLET", - "language": "en" -} ---- - - - -# ADMIN DIAGNOSE TABLET -## description - - This statement is used to diagnose the specified tablet. The results will show information about the tablet and some potential problems. - - grammar: - - ADMIN DIAGNOSE TABLET tblet_id - - illustrate: - - The lines of information in the result are as follows: - 1. TabletExist: Whether the Tablet exists - 2. TabletId: Tablet ID - 3. Database: The DB to which the Tablet belongs and its ID - 4. Table: The Table to which Tablet belongs and its ID - 5. Partition: The Partition to which the Tablet belongs and its ID - 6. MaterializedIndex: The materialized view to which the Tablet belongs and its ID - 7. Replicas(ReplicaId -> BackendId): Tablet replicas and their BE. - 8. ReplicasNum: Whether the number of replicas is correct. - 9. ReplicaBackendStatus: Whether the BE node where the replica is located is normal. - 10.ReplicaVersionStatus: Whether the version number of the replica is normal. - 11.ReplicaStatus: Whether the replica status is normal. - 12.ReplicaCompactionStatus: Whether the replica Compaction status is normal. - -## example - - 1. Diagnose tablet 10001 - - ADMIN DIAGNOSE TABLET 10001; - -## keyword - ADMIN,DIAGNOSE,TABLET diff --git a/docs/en/sql-reference/sql-statements/Administration/ALTER CLUSTER.md b/docs/en/sql-reference/sql-statements/Administration/ALTER CLUSTER.md deleted file mode 100644 index 12643abd7d..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ALTER CLUSTER.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "ALTER CLUSTER", - "language": "en" -} ---- - - - -# ALTER CLUSTER -## description - -This statement is used to update the logical cluster. Administrator privileges are required - -grammar - -ALTER CLUSTER cluster_name PROPERTIES ("key"="value", ...); - -1. Scaling, scaling (according to the number of be existing in the cluster, large is scaling, small is scaling), scaling for synchronous operation, scaling for asynchronous operation, through the state of backend can be known whether the scaling is completed. - -## example - -1. Reduce the number of be of logical cluster test_cluster containing 3 be by 2. - -ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="2"); - -2. Expansion, increase the number of be of logical cluster test_cluster containing 3 be to 4 - -ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="4"); - -## keyword -ALTER,CLUSTER diff --git a/docs/en/sql-reference/sql-statements/Administration/ALTER SYSTEM.md b/docs/en/sql-reference/sql-statements/Administration/ALTER SYSTEM.md deleted file mode 100644 index 7c3d4902a4..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ALTER SYSTEM.md +++ /dev/null @@ -1,141 +0,0 @@ ---- -{ - "title": "ALTER SYSTEM", - "language": "en" -} ---- - - - -# ALTER SYSTEM - -## Description - - This statement is used to operate on nodes in a system. (Administrator only!) - - Syntax: - 1) Adding nodes (without multi-tenant functionality, add in this way) - ALTER SYSTEM ADD BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 2) Adding idle nodes (that is, adding BACKEND that does not belong to any cluster) - ALTER SYSTEM ADD FREE BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 3) Adding nodes to a cluster - ALTER SYSTEM ADD BACKEND TO cluster_name "host:heartbeat_port"[,"host:heartbeat_port"...]; - 4) Delete nodes - ALTER SYSTEM DROP BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 5) Node offline - ALTER SYSTEM DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 6) Add Broker - ALTER SYSTEM ADD BROKER broker_name "host:port"[,"host:port"...]; - 7) Drop Broker - ALTER SYSTEM DROP BROKER broker_name "host:port"[,"host:port"...]; - 8) Delete all Brokers - ALTER SYSTEM DROP ALL BROKER broker_name - 9) Set up a Load error hub for centralized display of import error information - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES ("key" = "value"[, ...]); - 10) Modify property of BE - ALTER SYSTEM MODIFY BACKEND "host:heartbeat_port" SET ("key" = "value"[, ...]); - - Explain: - 1) Host can be hostname or IP address - 2) heartbeat_port is the heartbeat port of the node - 3) Adding and deleting nodes are synchronous operations. These two operations do not take into account the existing data on the node, the node is directly deleted from the metadata, please use cautiously. - 4) Node offline operations are used to secure offline nodes. This operation is asynchronous. If successful, the node will eventually be removed from the metadata. If it fails, the offline will not be completed. - 5) The offline operation of the node can be cancelled manually. See CANCEL DECOMMISSION for details - 6) Load error hub: - Currently, two types of Hub are supported: Mysql and Broker. You need to specify "type" = "mysql" or "type" = "broker" in PROPERTIES. - If you need to delete the current load error hub, you can set type to null. - 1) When using the Mysql type, the error information generated when importing will be inserted into the specified MySQL library table, and then the error information can be viewed directly through the show load warnings statement. - - Hub of Mysql type needs to specify the following parameters: - host: mysql host - port: mysql port - user: mysql user - password: mysql password - database mysql database - table: mysql table - - 2) When the Broker type is used, the error information generated when importing will form a file and be written to the designated remote storage system through the broker. Make sure that the corresponding broker is deployed - Hub of Broker type needs to specify the following parameters: - Broker: Name of broker - Path: Remote Storage Path - Other properties: Other information necessary to access remote storage, such as authentication information. - - 7) Modify BE node attributes currently supports the following attributes: - 1. tag.location: Resource tag - 2. disable_query: Query disabled attribute - 3. disable_load: Load disabled attribute - -## Example - - 1. Add a node - ALTER SYSTEM ADD BACKEND "host:port"; - - 2. Adding an idle node - ALTER SYSTEM ADD FREE BACKEND "host:port"; - - 3. Delete two nodes - ALTER SYSTEM DROP BACKEND "host1:port", "host2:port"; - - 4. offline two nodes - ALTER SYSTEM DECOMMISSION BACKEND "host1:port", "host2:port"; - - 5. Add two Hdfs Broker - ALTER SYSTEM ADD BROKER hdfs "host1:port", "host2:port"; - - 6. Add a load error hub of Mysql type - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES - ("type"= "mysql", - "host" = "192.168.1.17" - "port" = "3306", - "User" = "my" name, - "password" = "my_passwd", - "database" = "doris_load", - "table" = "load_errors" - ); - - 7. 添加一个 Broker 类型的 load error hub - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES - ("type"= "broker", - "Name" = BOS, - "path" = "bos://backup-cmy/logs", - "bos_endpoint" ="http://gz.bcebos.com", - "bos_accesskey" = "069fc278xxxxxx24ddb522", - "bos_secret_accesskey"="700adb0c6xxxxxx74d59eaa980a" - ); - - 8. Delete the current load error hub - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES - ("type"= "null"); - - 9. Modify BE resource tag - - ALTER SYSTEM MODIFY BACKEND "host1:9050" SET ("tag.location" = "group_a"); - - 10. Modify the query disabled attribute of BE - - ALTER SYSTEM MODIFY BACKEND "host1:9050" SET ("disable_query" = "true"); - - 11. Modify the load disabled attribute of BE - - ALTER SYSTEM MODIFY BACKEND "host1:9050" SET ("disable_load" = "true"); - -## keyword - - AGE, SYSTEM, BACKGROUND, BROKER, FREE diff --git a/docs/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md b/docs/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md deleted file mode 100644 index 6965e6b80d..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -{ - "title": "CANCEL DECOMMISSION", - "language": "en" -} ---- - - - -# CANCEL DECOMMISSION -## Description - -This statement is used to undo a node's offline operation. (Administrator only!) -Grammar: -CANCEL DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - -## example - -1. Cancel the offline operation of two nodes: -CANCEL DECOMMISSION BACKEND "host1:port", "host2:port"; - -## keyword -CANCEL,DECOMMISSION,BACKEND diff --git a/docs/en/sql-reference/sql-statements/Administration/CREATE CLUSTER.md b/docs/en/sql-reference/sql-statements/Administration/CREATE CLUSTER.md deleted file mode 100644 index 440939dce0..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/CREATE CLUSTER.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -{ - "title": "CREATE CLUSTER", - "language": "en" -} ---- - - - -# CREATE CLUSTER -## Description - -This statement is used to create a new logical cluster, requiring administrator privileges. If you don't use multiple tenants, create a cluster named default_cluster directly. Otherwise, create a cluster with a custom name. - -grammar - -CREATE CLUSTER [IF NOT EXISTS] cluster_name - -PROPERTIES ("key"="value", ...) - -IDENTIFIED BY 'password' - -1. PROPERTIES - -Specify attributes of logical clusters - -PROPERTIES ("instance_num" = "3") - - -2. Identify by 'password' each logical cluster contains a superuser whose password must be specified when creating a logical cluster - -## example - -1. Create a new test_cluster with three be nodes and specify its superuser password - -CREATE CLUSTER test_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; - -2. Create a new default_cluster with three be nodes (no multi-tenant is used) and specify its superuser password - -CREATE CLUSTER default_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; - -## keyword -CREATE,CLUSTER diff --git a/docs/en/sql-reference/sql-statements/Administration/CREATE FILE.md b/docs/en/sql-reference/sql-statements/Administration/CREATE FILE.md deleted file mode 100644 index a7f13f30b9..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/CREATE FILE.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -{ - "title": "CREATE FILE", - "language": "en" -} ---- - - - -# CREATE FILE -## Description - -This statement is used to create and upload a file to the Doris cluster. -This function is usually used to manage files that need to be used in some other commands, such as certificates, public key, private key, etc. - -This command can be executed by users with admin privileges only. -A file belongs to a database. This file can be used by users who have access to database. - -The size of a single file is limited to 1MB. -A Doris cluster uploads up to 100 files. - -Grammar: - -CREATE FILE "File name" [IN database] -[properties] - -Explain: -File_name: Custom file name. -Database: The file belongs to a db, and if not specified, the DB of the current session is used. -properties 支持以下参数: - -Url: Must. Specify a download path for a file. Currently only unauthenticated HTTP download paths are supported. When the command line succeeds, the file will be saved in Doris and the URL will no longer be required. -Catalog: Yes. The classification name of the file can be customized. But in some commands, files in the specified catalog are looked up. For example, in a routine import, when the data source is kafka, the file under the name of catalog is looked up. -Md5: Optional. MD5 of the file. If specified, it will be checked after downloading the file. - -## example - -1. Create a file ca. pem, categorized as Kafka - -CREATE FILE "ca.pem" -PROPERTIES -( -"url" ="https://test.bj.bcebos.com /kafka -key /ca.pem", -"catalog" = "kafka" -); - -2. Create the file client. key, categorized as my_catalog - -CREATE FILE "client.key" -IN my database -PROPERTIES -( -"url" ="https://test.bj.bcebos.com /kafka -key /client.key", -"catalog" = "my_catalog", -"md5"= "b5bb901bf1099205b39a46ac3557dd9" -); - -## keyword -CREATE,FILE diff --git a/docs/en/sql-reference/sql-statements/Administration/DROP CLUSTER.md b/docs/en/sql-reference/sql-statements/Administration/DROP CLUSTER.md deleted file mode 100644 index 4330caa93d..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/DROP CLUSTER.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -{ - "title": "DROP CLUSTER", - "language": "en" -} ---- - - - -# DROP CLUSTER -## Description - -This statement is used to delete logical cluster. Successful deletion of logical cluster requires first deleting dB in the cluster and administrator privileges. - -grammar - -DROP CLUSTER [IF EXISTS] cluster_name - -## example - -Delete logical cluster test_cluster - -DROP CLUSTER test_cluster; - -## keyword -DROP,CLUSTER diff --git a/docs/en/sql-reference/sql-statements/Administration/DROP FILE.md b/docs/en/sql-reference/sql-statements/Administration/DROP FILE.md deleted file mode 100644 index 4d259fa980..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/DROP FILE.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -{ - "title": "DROP FILE", - "language": "en" -} ---- - - - -# DROP FILE -## Description - -This statement is used to delete an uploaded file. - -Grammar: - -DROP FILE "file_name" [FROM database] -[properties] - -Explain: -File_name: File name. -Database: A DB to which the file belongs, if not specified, uses the DB of the current session. -properties 支持以下参数: - -Catalog: Yes. Classification of documents. - -## example - -1. Delete the file ca.pem - -DROP FILE "ca.pem" properties("catalog" = "kafka"); - -## keyword -DROP,FILE diff --git a/docs/en/sql-reference/sql-statements/Administration/ENTER.md b/docs/en/sql-reference/sql-statements/Administration/ENTER.md deleted file mode 100644 index 22b3a10c1d..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/ENTER.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "ENTER", - "language": "en" -} ---- - - - -# ENTER -## Description - -This statement is used to enter a logical cluster. All users and databases created need to be executed in a logical cluster. After creation, they belong to the logic. - -Cluster, need administrator privileges - -ENTER cluster name - -## example - -1. Enter the logical cluster test_cluster - -ENTER test cluster; - -## keyword -ENTER - diff --git a/docs/en/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md b/docs/en/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md deleted file mode 100644 index 71bc9f715e..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -{ - "title": "INSTALL PLUGIN", - "language": "en" -} ---- - - - -# INSTALL PLUGIN -## description - - To install a plugin - - Syntax - - INSTALL PLUGIN FROM [source] - - source supports 3 kinds: - - 1. Point to a zip file with absolute path. - 2. Point to a plugin dir with absolute path. - 3. Point to a http/https download link of zip file. - - PROPERTIES supports setting some configurations of the plugin, such as setting the md5sum value of the zip file. - -## example - - 1. Intall a plugin with a local zip file: - - INSTALL PLUGIN FROM "/home/users/doris/auditdemo.zip"; - - 2. Intall a plugin with a local dir: - - INSTALL PLUGIN FROM "/home/users/doris/auditdemo/"; - - 3. Download and install a plugin: - - INSTALL PLUGIN FROM "http://mywebsite.com/plugin.zip"; - - 4. Download and install a plugin, and set the md5sum value of the zip file: - - INSTALL PLUGIN FROM "http://mywebsite.com/plugin.zip" PROPERTIES("md5sum" = "73877f6029216f4314d712086a146570"); - -## keyword - INSTALL,PLUGIN diff --git a/docs/en/sql-reference/sql-statements/Administration/LINK DATABASE.md b/docs/en/sql-reference/sql-statements/Administration/LINK DATABASE.md deleted file mode 100644 index cf5b87ddbd..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/LINK DATABASE.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "LINK DATABASE", - "language": "en" -} ---- - - - -# LINK DATABASE -## Description - -This statement allows users to link a database of one logical cluster to another logical cluster. A database is only allowed to be linked once at the same time and the linked database is deleted. - -It does not delete data, and the linked database cannot be deleted. Administrator privileges are required. - -grammar - -LINK DATABASE src u cluster name.src db name of the cluster name.des db name - -## example - -1. Link test_db in test_cluster A to test_cluster B and name it link_test_db - -LINK DATABASE test_clusterA.test_db test_clusterB.link_test_db; - -2. Delete linked database link_test_db - -DROP DATABASE link_test_db; - -## keyword -LINK,DATABASE diff --git a/docs/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md b/docs/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md deleted file mode 100644 index bd6d1299a1..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "MIGRATE DATABASE", - "language": "en" -} ---- - - - -# MIGRATE DATABASE -## Description - -This statement is used to migrate a logical cluster database to another logical cluster. Before performing this operation, the database must be in a link state and need to be managed. - -Membership authority - -grammar - -MIGRATE DATABASE src u cluster name.src db name of the cluster name.des db name - -## example - -1. 迁移test_clusterA中的test_db到test_clusterB - -MIGRATE DATABASE test_clusterA.test_db test_clusterB.link_test_db; - -## keyword -MIGRATE,DATABASE diff --git a/docs/en/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md b/docs/en/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md deleted file mode 100644 index 30ba218490..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "SET LDAP_ADMIN_PASSWORD", - "language": "en" -} ---- - - - -# SET LDAP_ADMIN_PASSWORD -## description - -Syntax: - - SET LDAP_ADMIN_PASSWORD = 'plain password' - - The SET command is used to set the LDAP administrator password. When using LDAP authentication, doris needs to use the administrator account and password to query the LDAP service for information about the logging user. - -## example - -1. Set LDAP admin password: -``` -SET LDAP_ADMIN_PASSWORD = '123456' -``` - -## keyword - SET, LDAP, LDAP_ADMIN_PASSWORD - diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW BACKENDS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW BACKENDS.md deleted file mode 100644 index e89adc9404..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW BACKENDS.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -{ - "title": "SHOW BACKENDS", - "language": "en" -} ---- - - - -# SHOW BACKENDS -## Description -This statement is used to view BE nodes in the cluster -Grammar: -SHOW BACKENDS; - -Explain: -1. LastStartTime indicates the last BE start-up time. -2. LastHeartbeat represents the latest heartbeat. -3. Alive indicates whether the node survives. -4. System Decommissioned is true to indicate that the node is safely offline. -5. Cluster Decommissioned is true to indicate that the node is rushing downline in the current cluster. -6. TabletNum represents the number of fragments on the node. -7. Data Used Capacity represents the space occupied by the actual user data. -8. Avail Capacity represents the available space on the disk. -9. Total Capacity represents total disk space. Total Capacity = AvailCapacity + DataUsedCapacity + other non-user data files take up space. -10. UsedPct represents the percentage of disk usage. -11. ErrMsg is used to display error messages when a heartbeat fails. -12. Status is used to display some Status information about BE in JSON format, including the last time that BE reported it's tablet. - -## keyword -SHOW, BACKENDS diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW BROKER.md b/docs/en/sql-reference/sql-statements/Administration/SHOW BROKER.md deleted file mode 100644 index 20d8238840..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW BROKER.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -{ - "title": "SHOW BROKER", - "language": "en" -} ---- - - - -# SHOW BROKER -## Description -This statement is used to view the existing broker -Grammar: -SHOW BROKER; - -Explain: -1. LastStartTime indicates the last BE start-up time. -2. LastHeartbeat represents the latest heartbeat. -3. Alive indicates whether the node survives. -4. ErrMsg is used to display error messages when the heartbeat fails. - -## keyword -SHOW, BROKER diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW FILE.md b/docs/en/sql-reference/sql-statements/Administration/SHOW FILE.md deleted file mode 100644 index 0fabe4e9ed..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW FILE.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -{ - "title": "SHOW FILE", - "language": "en" -} ---- - - - -# SHOW FILE -## Description - -This statement is used to show a file created in a database - -Grammar: - -SHOW FILE [FROM database]; - -Explain: - -FileId: File ID, globally unique -DbName: The name of the database to which it belongs -Catalog: Custom Categories -FileName: File name -FileSize: File size, unit byte -MD5: Document MD5 - -## example - -1. View uploaded files in my_database - -SHOW FILE FROM my_database; - -## keyword -SHOW,FILE diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md deleted file mode 100644 index 097829522e..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -{ - "title": "SHOW FRONTENDS", - "language": "en" -} ---- - - - -# SHOW FRONTENDS -## Description -This statement is used to view FE nodes -Grammar: -SHOW FRONTENDS; - -Explain: -1. name denotes the name of the FE node in bdbje. -2. Join is true to indicate that the node has joined the cluster. But it doesn't mean that it's still in the cluster (it may be out of touch) -3. Alive indicates whether the node survives. -4. Replayed Journal Id represents the maximum metadata log ID that the node has currently replayed. -5. LastHeartbeat is the latest heartbeat. -6. IsHelper indicates whether the node is a helper node in bdbje. -7. ErrMsg is used to display error messages when the heartbeat fails. - -## keyword -SHOW, FRONTENDS diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md deleted file mode 100644 index 0aae646571..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -{ - "title": "SHOW FULL COLUMNS", - "language": "en" -} ---- - - - -# SHOW FULL COLUMNS -## description - This statement is used to view some information about columns of a table. - - Syntax: - SHOW FULL COLUMNS FROM tbl; - -## example - - 1. View the column information of specified table - - SHOW FULL COLUMNS FROM tbl; - -## keyword - - SHOW,FULL,COLUMNS diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW INDEX.md b/docs/en/sql-reference/sql-statements/Administration/SHOW INDEX.md deleted file mode 100644 index 80a4b00a6b..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW INDEX.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -{ - "title": "SHOW INDEX", - "language": "en" -} ---- - - - -# SHOW INDEX - -## description - - This statement is used to show all index(only bitmap index in current version) of a table - Grammar: - SHOW INDEX[ES] FROM [db_name.]table_name [FROM database]; - - OR - - SHOW KEY[S] FROM [db_name.]table_name [FROM database]; - -## example - - 1. dispaly all indexes in table table_name - SHOW INDEX FROM example_db.table_name; - -## keyword - - SHOW,INDEX diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md deleted file mode 100644 index d844d423ce..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -{ - "title": "SHOW MIGRATIONS", - "language": "en" -} ---- - - - -# SHOW MIGRATIONS -## Description - -This statement is used to view the progress of database migration - -grammar - -SHOW MIGRATIONS - -## keyword -SHOW,MIGRATIONS diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW PLUGINS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW PLUGINS.md deleted file mode 100644 index 02a4e13160..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW PLUGINS.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "SHOW PLUGINS", - "language": "en" -} ---- - - - -# SHOW PLUGINS -## description - - To view the installed plugins. - - Syntax - - SHOW PLUGINS; - - This command will show all builtin and custom plugins. - -## example - - 1. To view the installed plugins: - - SHOW PLUGINS; - -## keyword - SHOW, PLUGIN, PLUGINS \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md b/docs/en/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md deleted file mode 100644 index a396e1b43f..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -{ - "title": "SHOW TABLE STATUS", - "language": "en" -} ---- - - - -# SHOW TABLE STATUS - -## description - -This statement is used to view some information about Table. - - Syntax: - - SHOW TABLE STATUS - [FROM db] [LIKE "pattern"] - - Explain: - - 1. This statement is mainly used to be compatible with MySQL grammar. At present, only a small amount of information such as Comment is displayed. - -## Example - - 1. View the information of all tables under the current database - - SHOW TABLE STATUS; - - - 2. View the information of the table whose name contains example in the specified database - - SHOW TABLE STATUS FROM DB LIKE "% example%"; - -## Keyword - - SHOW,TABLE,STATUS \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW TRASH.md b/docs/en/sql-reference/sql-statements/Administration/SHOW TRASH.md deleted file mode 100644 index b1081d524f..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW TRASH.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -{ - "title": "SHOW TRASH", - "language": "en" -} ---- - - - -# SHOW TRASH -## description - -This statement is used to view trash used capacity on some backends. - - Syntax: - - SHOW TRASH [ON "BackendHost:BackendHeartBeatPort"]; - - Explain: - - 1. Backend The format is BackendHost:BackendHeartBeatPort of the node. - 2. TrashUsedCapacity Indicates that the trash data of the node occupies space. - -## example - - 1. View the space occupied by trash data of all be nodes. - - SHOW TRASH; - - 2. Check the space occupied by trash data of '192.168.0.1:9050'(The specific disk information will be displayed). - - SHOW TRASH ON "192.168.0.1:9050"; - -## keyword - SHOW, TRASH - diff --git a/docs/en/sql-reference/sql-statements/Administration/SHOW VIEW.md b/docs/en/sql-reference/sql-statements/Administration/SHOW VIEW.md deleted file mode 100644 index 976dfc29f4..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/SHOW VIEW.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -{ - "title": "SHOW TABLE STATUS", - "language": "en" -} ---- - - - -# SHOW VIEW - -## description - - This statement is used to show all views based on a given table - - Syntax: - - SHOW VIEW { FROM | IN } table [ FROM db ] - -## Example - - 1. Show all views based on the table testTbl - - show view from testTbl; - - -## Keyword - - SHOW,VIEW \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md b/docs/en/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md deleted file mode 100644 index 8be8b97f43..0000000000 --- a/docs/en/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -{ - "title": "UNINSTALL PLUGIN", - "language": "en" -} ---- - - - -# UNINSTALL PLUGIN -## description - - To uninstall a plugin. - - Syntax - - UNINSTALL PLUGIN plugin_name; - - plugin_name can be found by `SHOW PLUGINS;`. - - Can only uninstall non-builtin plugins. - -## example - - 1. Uninstall a plugin: - - UNINSTALL PLUGIN auditdemo; - -## keyword - UNINSTALL,PLUGIN \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md b/docs/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md deleted file mode 100644 index 6d6c2dd28f..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -{ - "title": "ALTER DATABASE", - "language": "en" -} ---- - - - -# ALTER DATABASE -## description -This statement is used to set the properties of the specified database. (Administrators only) -Grammar: -1) Setting database data quota in B/K/KB/M/MB/G/GB/T/TB/P/PB -OTHER DATABASE dbu name SET DATA QUOTA quota; - -2) Rename the database -ALTER DATABASE db_name RENAME new_db_name; - -Explain: -After renaming the database, use REVOKE and GRANT commands to modify the corresponding user rights if necessary. -The database's default data quota is 1024GB, and the default replica quota is 1073741824. - -## example -1. Setting the specified database data quota -ALTER DATABASE example_db SET DATA QUOTA 10995116277760; -The above units are bytes, equivalent to -ALTER DATABASE example_db SET DATA QUOTA 10T; - -ALTER DATABASE example_db SET DATA QUOTA 100G; - -ALTER DATABASE example_db SET DATA QUOTA 200M; - -2. Rename the database example_db to example_db2 -ALTER DATABASE example_db RENAME example_db2; - -## keyword -ALTER,DATABASE,RENAME - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md b/docs/en/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md deleted file mode 100644 index 1d1361cfa4..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -{ -"title": "ALTER RESOURCE", -"language": "en" -} ---- - - - -# ALTER RESOURCE - -## Description - - This statement is used to modify an existing resource. Only the root or admin user can modify resources. - Syntax: - ALTER RESOURCE 'resource_name' - PROPERTIES ("key"="value", ...); - - Note: The resource type does not support modification. - -## Example - - 1. Modify the working directory of the Spark resource named spark0: - ALTER RESOURCE 'spark0' PROPERTIES ("working_dir" = "hdfs://127.0.0.1:10000/tmp/doris_new"); - - 2. Modify the maximum number of connections to the S3 resource named remote_s3: - ALTER RESOURCE 'remote_s3' PROPERTIES ("s3_max_connections" = "100"); - -## keyword - - ALTER, RESOURCE \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Data Definition/ALTER TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/ALTER TABLE.md deleted file mode 100644 index 0d4a1f6f04..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/ALTER TABLE.md +++ /dev/null @@ -1,445 +0,0 @@ ---- -{ - "title": "ALTER TABLE", - "language": "en" -} ---- - - - -# ALTER TABLE - -## description - - This statement is used to modify an existing table. If no rollup index is specified, the base operation is the default. - The statement is divided into three types of operations: schema change, rollup, partition - These three types of operations cannot appear in an ALTER TABLE statement at the same time. - Where schema change and rollup are asynchronous operations and are returned if the task commits successfully. You can then use the SHOW ALTER command to view the progress. - Partition is a synchronous operation, and a command return indicates that execution is complete. - - grammar: - ALTER TABLE [database.]table - Alter_clause1[, alter_clause2, ...]; - - The alter_clause is divided into partition, rollup, schema change, rename and bimmap index. - - Partition supports the following modifications - Increase the partition - grammar: - ADD PARTITION [IF NOT EXISTS] partition_name - Partition_desc ["key"="value"] - [DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]] - note: - 1) partition_desc supports two ways of writing: - * VALUES LESS THAN [MAXVALUE|("value1", ...)] - * VALUES [("value1", ...), ("value1", ...)) - 1) The partition is the left closed right open interval. If the user only specifies the right boundary, the system will automatically determine the left boundary. - 2) If the bucket mode is not specified, the bucket method used by the built-in table is automatically used. - 3) If the bucket mode is specified, only the bucket number can be modified, and the bucket mode or bucket column cannot be modified. - 4) ["key"="value"] section can set some properties of the partition, see CREATE TABLE for details. - 5) Adding partitions to non-partitioned table is not supported. - - 2. Delete the partition - grammar: - DROP PARTITION [IF EXISTS] partition_name - note: - 1) Use a partitioned table to keep at least one partition. - 2) Execute DROP PARTITION For a period of time, the deleted partition can be recovered by the RECOVER statement. See the RECOVER statement for details. - 3) If DROP PARTITION FORCE is executed, the system will not check whether the partition has unfinished transactions, the partition will be deleted directly and cannot be recovered, generally this operation is not recommended - - 3. Modify the partition properties - grammar: - MODIFY PARTITION p1|(p1[, p2, ...]) SET ("key" = "value", ...) - Description: - 1) The following attributes of the modified partition are currently supported. - - storage_medium - - storage_cooldown_time - - remote_storage_cooldown_time - - replication_num - — in_memory - 2) For single-partition tables, partition_name is the same as the table name. - - Rollup supports the following ways to create: - 1. Create a rollup index - grammar: - ADD ROLLUP rollup_name (column_name1, column_name2, ...) - [FROM from_index_name] - [PROPERTIES ("key"="value", ...)] - - properties: Support setting timeout time, the default timeout time is 1 day. - example: - ADD ROLLUP r1(col1,col2) from r0 - 1.2 Batch create rollup index - grammar: - ADD ROLLUP [rollup_name (column_name1, column_name2, ...) - [FROM from_index_name] - [PROPERTIES ("key"="value", ...)],...] - example: - ADD ROLLUP r1(col1,col2) from r0, r2(col3,col4) from r0 - 1.3 note: - 1) If from_index_name is not specified, it is created by default from base index - 2) The columns in the rollup table must be existing columns in from_index - 3) In properties, you can specify the storage format. See CREATE TABLE for details. - - 2. Delete the rollup index - grammar: - DROP ROLLUP rollup_name - [PROPERTIES ("key"="value", ...)] - example: - DROP ROLLUP r1 - 2.1 Batch Delete rollup index - grammar: DROP ROLLUP [rollup_name [PROPERTIES ("key"="value", ...)],...] - example: DROP ROLLUP r1,r2 - 2.2 note: - 1) Cannot delete base index - - - Schema change supports the following modifications: - 1. Add a column to the specified location of the specified index - grammar: - ADD COLUMN column_name column_type [KEY | agg_type] [DEFAULT "default_value"] - [AFTER column_name|FIRST] - [TO rollup_index_name] - [PROPERTIES ("key"="value", ...)] - note: - 1) Aggregate model If you add a value column, you need to specify agg_type - 2) Non-aggregate models (such as DUPLICATE KEY) If you add a key column, you need to specify the KEY keyword. - 3) You cannot add a column that already exists in the base index to the rollup index - Recreate a rollup index if needed - - 2. Add multiple columns to the specified index - grammar: - ADD COLUMN (column_name1 column_type [KEY | agg_type] DEFAULT "default_value", ...) - [TO rollup_index_name] - [PROPERTIES ("key"="value", ...)] - note: - 1) Aggregate model If you add a value column, you need to specify agg_type - 2) Non-aggregate model If you add a key column, you need to specify the KEY keyword. - 3) You cannot add a column that already exists in the base index to the rollup index - (You can recreate a rollup index if needed) - - 3. Remove a column from the specified index - grammar: - DROP COLUMN column_name - [FROM rollup_index_name] - note: - 1) Cannot delete partition column - 2) If the column is removed from the base index, it will also be deleted if the column is included in the rollup index - - 4. Modify the column type and column position of the specified index - grammar: - MODIFY COLUMN column_name column_type [KEY | agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] - [AFTER column_name|FIRST] - [FROM rollup_index_name] - [PROPERTIES ("key"="value", ...)] - note: - 1) Aggregate model If you modify the value column, you need to specify agg_type - 2) Non-aggregate type If you modify the key column, you need to specify the KEY keyword. - 3) Only the type of the column can be modified. The other attributes of the column remain as they are (ie other attributes need to be explicitly written in the statement according to the original attribute, see example 8) - 4) The partition column cannot be modified - 5) The following types of conversions are currently supported (accuracy loss is guaranteed by the user) - TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE convert to a wider range of numeric types - TINTINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE/DECIMAL is converted to VARCHAR - VARCHAR supports modification of maximum length - Convert VARCHAR/CHAR to TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE. - Convert VARCHAR/CHAR to DATE (currently support six formats: "%Y-%m-%d", "%y-%m-%d", "%Y%m%d", "%y%m%d", "%Y/%m/%d, "%y/%m/%d") - Convert DATETIME to DATE(Only year-month-day information is retained, For example: `2019-12-09 21:47:05` <--> `2019-12-09`) - Convert DATE to DATETIME(Set hour, minute, second to zero, For example: `2019-12-09` <--> `2019-12-09 00:00:00`) - Convert FLOAT to DOUBLE - Convert INT to DATE (If the INT data fails to convert, the original data remains the same) - 6) Does not support changing from NULL to NOT NULL - - 5. Reorder the columns of the specified index - grammar: - ORDER BY (column_name1, column_name2, ...) - [FROM rollup_index_name] - [PROPERTIES ("key"="value", ...)] - note: - 1) All columns in index must be written - 2) value is listed after the key column - - 6. Modify the properties of the table, currently supports modifying the bloom filter column, the colocate_with attribute and the dynamic_partition attribute, the replication_num and default.replication_num. - grammar: - PROPERTIES ("key"="value") - note: - Can also be merged into the above schema change operation to modify, see the example below - - 7. Enable batch delete support - grammar: - ENABLE FEATURE "BATCH_DELETE" - note: - 1) Only support unique tables - 2) Batch deletion is supported for old tables, while new tables are already supported when they are created - - 8. Enable the ability to import in order by the value of the sequence column - grammer: - ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date") - note: - 1) Only support unique tables - 2) The sequence_type is used to specify the type of the sequence column, which can be integral and time type - 3) Only the orderliness of newly imported data is supported. Historical data cannot be changed -   - 9. Modify default buckets number of partition - grammer: - MODIFY DISTRIBUTION DISTRIBUTED BY HASH (k1[,k2 ...]) BUCKETS num - note: - 1)Only support non colocate table with RANGE partition and HASH distribution - - 10. Modify table comment - grammer: - MODIFY COMMENT "new table comment" - - 11. Modify column comment - grammer: - MODIFY COLUMN col1 COMMENT "new column comment" - - 12. Modify engine type - - Only the MySQL type can be changed to the ODBC type. The value of driver is the name of the driver in the odbc.init configuration. - - grammar: - MODIFY ENGINE TO odbc PROPERTIES("driver" = "MySQL"); -   - Rename supports modification of the following names: - 1. Modify the table name - grammar: - RENAME new_table_name; - - 2. Modify the rollup index name - grammar: - RENAME ROLLUP old_rollup_name new_rollup_name; - - 3. Modify the partition name - grammar: - RENAME PARTITION old_partition_name new_partition_name; - - Replace supports swap data between two tables: - 1. swap data between two tables - geammar: - REPLACE WITH TABLE new_table [PROPERTIES('swap' = 'true')]; - note: - 1. if swap is true, swap data between two tables. - 2. if swap is fasle, replace the old_table with the new_table, and delete the new_table. - - Bitmap index supports the following modifications: - 1. create bitmap index - grammar: - ADD INDEX [IF NOT EXISTS] index_name (column [, ...],) [USING BITMAP] [COMMENT 'balabala']; - note: - 1. only supports bitmap index for current version - 2. BITMAP index only supports apply on single column - 2. drop index - grammar: - DROP INDEX [IF EXISTS] index_name; - -## example - - [table] - 1. Modify the default number of replications of the table, which is used as default number of replications while creating new partition. - ALTER TABLE example_db.my_table - SET ("default.replication_num" = "2"); - - 2. Modify the actual number of replications of a unpartitioned table (unpartitioned table only) - ALTER TABLE example_db.my_table - SET ("replication_num" = "3"); - - [partition] - 1. Add partition, existing partition [MIN, 2013-01-01), add partition [2013-01-01, 2014-01-01), use default bucket mode - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES LESS THAN ("2014-01-01"); - - 2. Increase the partition and use the new number of buckets - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES LESS THAN ("2015-01-01") - DISTRIBUTED BY HASH(k1) BUCKETS 20; - - 3. Increase the partition and use the new number of copies - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES LESS THAN ("2015-01-01") - ("replication_num"="1"); - - 4. Modify the number of partition copies - ALTER TABLE example_db.my_table - MODIFY PARTITION p1 SET("replication_num"="1"); - - 5. Batch modify the specified partitions - ALTER TABLE example_db.my_table - MODIFY PARTITION (p1, p2, p4) SET("in_memory"="true"); - - 6. Batch modify all partitions - ALTER TABLE example_db.my_table - MODIFY PARTITION (*) SET("storage_medium"="HDD"); - - 7. Delete the partition - ALTER TABLE example_db.my_table - DROP PARTITION p1; - - 8. Add a partition that specifies the upper and lower bounds - - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES [("2014-01-01"), ("2014-02-01")); - - [rollup] - 1. Create index: example_rollup_index, based on base index(k1,k2,k3,v1,v2). Columnar storage. - ALTER TABLE example_db.my_table - ADD ROLLUP example_rollup_index(k1, k3, v1, v2); - - 2. Create index: example_rollup_index2, based on example_rollup_index(k1,k3,v1,v2) - ALTER TABLE example_db.my_table - ADD ROLLUP example_rollup_index2 (k1, v1) - FROM example_rollup_index; - - 3. Create index: example_rollup_index3, based on base index (k1, k2, k3, v1), custom rollup timeout time is one hour. - - ALTER TABLE example_db.my_table - ADD ROLLUP example_rollup_index(k1, k3, v1) - PROPERTIES("timeout" = "3600"); - - 3. Delete index: example_rollup_index2 - ALTER TABLE example_db.my_table - DROP ROLLUP example_rollup_index2; - - [schema change] - 1. Add a key column new_col to the col1 of example_rollup_index (non-aggregate model) - ALTER TABLE example_db.my_table - ADD COLUMN new_col INT KEY DEFAULT "0" AFTER col1 - TO example_rollup_index; - - 2. Add a value column new_col to the col1 of example_rollup_index (non-aggregate model) -   ALTER TABLE example_db.my_table -   ADD COLUMN new_col INT DEFAULT "0" AFTER col1 -   TO example_rollup_index; - - 3. Add a key column new_col (aggregation model) to col1 of example_rollup_index -   ALTER TABLE example_db.my_table -   ADD COLUMN new_col INT DEFAULT "0" AFTER col1 -   TO example_rollup_index; - - 4. Add a value column to the col1 of example_rollup_index. new_col SUM aggregation type (aggregation model) -   ALTER TABLE example_db.my_table -   ADD COLUMN new_col INT SUM DEFAULT "0" AFTER col1 -   TO example_rollup_index; - - 5. Add multiple columns to the example_rollup_index (aggregate model) - ALTER TABLE example_db.my_table - ADD COLUMN (col1 INT DEFAULT "1", col2 FLOAT SUM DEFAULT "2.3") - TO example_rollup_index; - - 6. Remove a column from example_rollup_index - ALTER TABLE example_db.my_table - DROP COLUMN col2 - FROM example_rollup_index; - - 7. Modify the base index's col1 key column to be of type BIGINT and move to the col2 column - (*Attention: Whether to modify the key column or the value column, complete column information need to be declared. For example, MODIFY COLUMN xxx COLUMNTYPE [KEY|agg_type]*) - ALTER TABLE example_db.my_table - MODIFY COLUMN col1 BIGINT KEY DEFAULT "1" AFTER col2; - - 8. Modify the maximum length of the val1 column of the base index. The original val1 is (val1 VARCHAR(32) REPLACE DEFAULT "abc") - ALTER TABLE example_db.my_table - MODIFY COLUMN val1 VARCHAR(64) REPLACE DEFAULT "abc"; - - 9. Reorder the columns in example_rollup_index (set the original column order: k1, k2, k3, v1, v2) - ALTER TABLE example_db.my_table - ORDER BY (k3, k1, k2, v2, v1) - FROM example_rollup_index; - - 10. Perform both operations simultaneously - ALTER TABLE example_db.my_table - ADD COLUMN v2 INT MAX DEFAULT "0" AFTER k2 TO example_rollup_index, - ORDER BY (k3,k1,k2,v2,v1) FROM example_rollup_index; - - 11. Modify the bloom filter column of the table - ALTER TABLE example_db.my_table SET ("bloom_filter_columns"="k1,k2,k3"); - - Can also be merged into the above schema change operation (note that the syntax of multiple clauses is slightly different) - ALTER TABLE example_db.my_table - DROP COLUMN col2 - PROPERTIES ("bloom_filter_columns"="k1,k2,k3"); - - 12. Modify the Colocate property of the table - - ALTER TABLE example_db.my_table set ("colocate_with" = "t1"); - - 13. Change the bucketing mode of the table from Hash Distribution to Random Distribution - - ALTER TABLE example_db.my_table set ("distribution_type" = "random"); - - 14. Modify the dynamic partition properties of the table (support adding dynamic partition properties to tables without dynamic partition properties) - - ALTER TABLE example_db.my_table set ("dynamic_partition.enable" = "false"); - - If you need to add dynamic partition attributes to a table without dynamic partition attributes, you need to specify all dynamic partition attributes. - (Note:Adding dynamic partition attributes to non-partitioned table is not supported) - - ALTER TABLE example_db.my_table set ("dynamic_partition.enable"= "true", "dynamic_partition.time_unit" = "DAY", "dynamic_partition.end "= "3", "dynamic_partition.prefix" = "p", "dynamic_partition.buckets" = "32"); - - - 15. Modify the in_memory property of the table - - ALTER TABLE example_db.my_table set ("in_memory" = "true"); - 16. Enable batch delete support - - ALTER TABLE example_db.my_table ENABLE FEATURE "BATCH_DELETE" - 17. Enable the ability to import in order by the value of the Sequence column - - ALTER TABLE example_db.my_table ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date") - - 18. Modify the default buckets number of example_db.my_table to 50 - - ALTER TABLE example_db.my_table MODIFY DISTRIBUTION DISTRIBUTED BY HASH(k1) BUCKETS 50; - - 19. Modify table comment - - ALTER TABLE example_db.my_table MODIFY COMMENT "new comment"; - - 20. Modify column comment - - ALTER TABLE example_db.my_table MODIFY COLUMN k1 COMMENT "k1", MODIFY COLUMN k2 COMMENT "k2"; - - 21. Modify engine Type - - ALTER TABLE example_db.mysql_table MODIFY ENGINE TO odbc PROPERTIES("driver" = "MySQL"); - - [rename] - 1. Modify the table named table1 to table2 - ALTER TABLE table1 RENAME table2; - - 2. Modify the rollup index named rollup1 in the table example_table to rollup2 - ALTER TABLE example_table RENAME ROLLUP rollup1 rollup2; - - 3. Modify the partition named p1 in the table example_table to p2 - ALTER TABLE example_table RENAME PARTITION p1 p2; - - [replace] - 1. swap data between two tables - ALTER TABLE table1 REPLACE WITH TABLE table2; - 2. replace the table1 with the table2, and delete the table2. - ALTER TABLE table1 REPLACE WITH TABLE table2 PROPERTIES('swap' = 'false'); - - [index] - 1. create index on table1 column siteid using bitmap - ALTER TABLE table1 ADD INDEX [IF NOT EXISTS] index_name [USING BITMAP] (siteid) COMMENT 'balabala'; - 2. drop bitmap index of table1 - ALTER TABLE table1 DROP INDEX [IF EXISTS] index_name; - -## keyword - - ALTER, TABLE, ROLLUP, COLUMN, PARTITION, RENAME diff --git a/docs/en/sql-reference/sql-statements/Data Definition/ALTER VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/ALTER VIEW.md deleted file mode 100644 index 03ff25f402..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/ALTER VIEW.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -{ - "title": "ALTER VIEW", - "language": "en" -} ---- - - - -# ALTER VIEW -## description - This statement is used to modify the definition of a view - Syntax: - ALTER VIEW - [db_name.]view_name - (column1[ COMMENT "col comment"][, column2, ...]) - AS query_stmt - - Explain: - 1. View is logical, it isn't stored in the physical medium. When we querying, view will be embed as subqueries in query statement. Therefore, modifying the definition of views is equivalent to modifying query_stmt which is defined in view. - 2. query_stmt is arbitrarily supported SQL. - -## example - - 1. Modify example_view on the example_db - - ALTER VIEW example_db.example_view - ( - c1 COMMENT "column 1", - c2 COMMENT "column 2", - c3 COMMENT "column 3" - ) - AS SELECT k1, k2, SUM(v1) FROM example_table - GROUP BY k1, k2 \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Data Definition/BACKUP.md b/docs/en/sql-reference/sql-statements/Data Definition/BACKUP.md deleted file mode 100644 index 6c1c7c317e..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/BACKUP.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -{ - "title": "BACKUP", - "language": "en" -} ---- - - - -# BACKUP -## Description -This statement is used to backup data under the specified database. This command is an asynchronous operation. After successful submission, you need to check progress through the SHOW BACKUP command. Only tables of OLAP type are backed up. -Grammar: -BACKUP SNAPSHOT [db_name].{snapshot_name} -TO `repository_name` -[ON|EXCLUDE] ( -`Table_name` [partition (`P1',...)], -... -) -PROPERTIES ("key"="value", ...); - -Explain: -1. Only one BACKUP or RESTORE task can be performed under the same database. -2. The ON clause identifies the tables and partitions that need to be backed up. If no partition is specified, all partitions of the table are backed up by default. -3. The EXCLUDE clause identifies the tables and partitions that need not to be backed up. All partitions of all tables in the database except the specified tables or partitions will be backed up. -4. PROPERTIES currently supports the following attributes: -"Type" = "full": means that this is a full update (default). -"Timeout" = "3600": Task timeout, default to one day. Unit seconds. - -## example - -1. Back up the table example_tbl under example_db in full to the warehouse example_repo: -BACKUP SNAPSHOT example_db.snapshot_label1 -TO example repo -On (example tbl) -PROPERTIES ("type" = "full"); - -2. Under full backup example_db, the P1 and P2 partitions of table example_tbl, and table example_tbl2 to warehouse example_repo: -BACKUP SNAPSHOT example_db.snapshot_label2 -TO example repo -ON -( -example_tbl PARTITION (p1,p2), -Example: -); - -3. Back up all tables in example_db except example_tbl to the warehouse example_repo: -BACKUP SNAPSHOT example_db.snapshot_label3 -TO example_repo -EXCLUDE (example_tbl); - -## keyword -BACKUP - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md b/docs/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md deleted file mode 100644 index bb9339be38..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -{ - "title": "CANCEL ALTER", - "language": "en" -} ---- - - - -# CANCEL ALTER -## Description -This statement is used to undo an ALTER operation. -1. 撤销 ALTER TABLE COLUMN 操作 -Grammar: -CANCEL ALTER TABLE COLUMN -FROM db_name.table_name - -2. 撤销 ALTER TABLE ROLLUP 操作 -Grammar: -CANCEL ALTER TABLE ROLLUP -FROM db_name.table_name - -3. batch cancel rollup by job id - Grammar: - CANCEL ALTER TABLE ROLLUP - FROM db_name.table_name (jobid,...) - Note: - Batch cancel rollup job is an async operation, use `show alter table rollup` to see whether it executes successfully - -2. OTHER CLUSTER -Grammar: -(To be realized... - - -## example -[CANCEL ALTER TABLE COLUMN] -1. Cancel ALTER COLUMN operation for my_table. -CANCEL ALTER TABLE COLUMN -FROM example_db.my_table; - -[CANCEL ALTER TABLE ROLLUP] -1. Cancel ADD ROLLUP operation for my_table. -CANCEL ALTER TABLE ROLLUP -FROM example_db.my_table; - -[CANCEL ALTER TABLE ROLLUP] -1. cancel rollup alter job by job id -CANCEL ALTER TABLE ROLLUP -FROM example_db.my_table (12801,12802); - -## keyword -CANCEL,ALTER,TABLE,COLUMN,ROLLUP - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md b/docs/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md deleted file mode 100644 index a0e379ad17..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "CANCEL BACKUP", - "language": "en" -} ---- - - - -# CANCEL BACKUP -## Description -This statement is used to cancel an ongoing BACKUP task. -Grammar: -CANCEL BACKUP FROM db_name; - -## example -1. Cancel the BACKUP task under example_db. -CANCEL BACKUP FROM example_db; - -## keyword -CANCEL, BACKUP - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md b/docs/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md deleted file mode 100644 index b4bf1bb569..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -{ - "title": "CANCEL RESTORE", - "language": "en" -} ---- - - - -# CANCEL RESTORE -## Description -This statement is used to cancel an ongoing RESTORE task. -Grammar: -CANCEL RESTORE FROM db_name; - -Be careful: -When the recovery is abolished around the COMMIT or later stage, the restored tables may be inaccessible. At this point, data recovery can only be done by performing the recovery operation again. - -## example -1. Cancel the RESTORE task under example_db. -CANCEL RESTORE FROM example_db; - -## keyword -CANCEL, RESTORE - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md deleted file mode 100644 index 7dadd2ef6e..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -{ - "title": "CREATE DATABASE", - "language": "en" -} ---- - - - -# CREATE DATABASE - -## Description - - This statement is used to create a new database - Syntax: - CREATE DATABASE [IF NOT EXISTS] db_name - [PROPERTIES ("key"="value", ...)] ; - -1. PROPERTIES - Additional information of a database, can be defaulted. - 1) In case of iceberg, the following information needs to be provided in the properties. - ``` - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ) - - ``` - `iceberg.database` is the name of the database corresponding to Iceberg. - `iceberg.hive.metastore.uris` is the address of the hive metastore service. - `iceberg.catalog.type` defaults to `HIVE_CATALOG`. Currently, only `HIVE_CATALOG` is supported, more Iceberg catalog types will be supported later. - -## example - 1. Create a new database db_test - ``` - CREATE DATABASE db_test; - ``` - - 2. Create a new Iceberg database iceberg_test - ``` - CREATE DATABASE `iceberg_test` - PROPERTIES ( - "iceberg.database" = "doris", - "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - -## keyword -CREATE,DATABASE - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md deleted file mode 100644 index 77928cb394..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -{ - "title": "CREATE ENCRYPTKEY", - "language": "en" -} ---- - - - -# CREATE ENCRYPTKEY - -## Description - -### Syntax - -``` -CREATE ENCRYPTKEY key_name - AS "key_string" -``` - -### Parameters - -> `key_name`: The name of the key to be created, which can include the name of the database. For example: `db1.my_key`. -> -> `key_string`: The string to create the key - -This statement creates a custom key. Executing this command requires the user to have the `ADMIN` privileges. - -If the database name is included in `key_name`, then this custom key will be created in the corresponding database, otherwise this function will be created in the database where the current session is located. The name of the new key cannot be the same as the key that already exists in the corresponding database, otherwise the creation will fail. - -## Example - -1. Create a custom key - - ``` - CREATE ENCRYPTKEY my_key as "ABCD123456789"; - ``` - -2. Using a custom key - - To use a custom key, add the keyword `KEY`/`key` before the key name, separated from `key_name` by a space. - - ``` - mysql> SELECT HEX(AES_ENCRYPT("Doris is Great", KEY my_key)); - +------------------------------------------------+ - | hex(aes_encrypt('Doris is Great', key my_key)) | - +------------------------------------------------+ - | D26DB38579D6A343350EDDC6F2AD47C6 | - +------------------------------------------------+ - 1 row in set (0.02 sec) - - mysql> SELECT AES_DECRYPT(UNHEX('D26DB38579D6A343350EDDC6F2AD47C6'), KEY my_key); - +--------------------------------------------------------------------+ - | aes_decrypt(unhex('D26DB38579D6A343350EDDC6F2AD47C6'), key my_key) | - +--------------------------------------------------------------------+ - | Doris is Great | - +--------------------------------------------------------------------+ - 1 row in set (0.01 sec) - ``` - -## Keyword - - CREATE,ENCRYPTKEY diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE INDEX.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE INDEX.md deleted file mode 100644 index df128fda18..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE INDEX.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "CREATE INDEX", - "language": "en" -} ---- - - - -# CREATE INDEX - -## description - - This statement is used to create index - grammer: - CREATE INDEX [IF NOT EXISTS] index_name ON table_name (column [, ...],) [USING BITMAP] [COMMENT'balabala']; - note: - 1. only support bitmap index in current version - 2. BITMAP index only supports apply to single column - -## example - - 1. create index on table1 column siteid using bitmap - CREATE INDEX [IF NOT EXISTS] index_name ON table1 (siteid) USING BITMAP COMMENT 'balabala'; - -## keyword - - CREATE,INDEX diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md deleted file mode 100644 index 4c0afa2c65..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md +++ /dev/null @@ -1,238 +0,0 @@ ---- -{ - "title": "CREATE MATERIALIZED VIEW", - "language": "en" -} ---- - - - -# CREATE MATERIALIZED VIEW - -## description - -This statement is used to create a materialized view. - - Asynchronous syntax. After the call is successful, it only indicates that the task to create the materialized view is successfully submitted. The user needs to check the progress of the materialized view by using ```show alter table rollup```. - - After the progress is FINISHED, you can use the ```desc [table_name] all``` command to view the schema of the materialized view. - -syntax: - - ``` - - CREATE MATERIALIZED VIEW [MG name] as [query] - [PROPERTIES ("key" = "value")] - - ``` - -1. MV name - - Name of the materialized view. Required. - - Materialized view names in the same table cannot be duplicated. - -2. query - - The query used to construct the materialized view. The result of the query is the data of the materialized view. The query format currently supported is: - - ``` -    SELECT select_expr [, select_expr ...] -    FROM [Base view name] -    GROUP BY column_name [, column_name ...] -    ORDER BY column_name [, column_name ...] -     -    The syntax is the same as the query syntax. - ``` - - select_expr: All columns in the materialized view's schema. - - + Only single columns and aggregate columns without expression calculation are supported. - + The aggregate function currently only supports SUM, MIN, MAX, and the parameters of the aggregate function can only be a single column without expression calculation. - + Contains at least one single column. - + All involved columns can only appear once. - - base view name: The original table name of the materialized view. Required. - - + Must be a single table and not a subquery - - group by: Grouped column of materialized view, optional. - - + If not filled, the data will not be grouped. - - order by: Sort order of materialized view, optional. - - + The order of the column sort must be the same as the column declaration order in select_expr. - + If order by is not specified, sort columns are automatically supplemented according to the rules. - - + If the materialized view is an aggregate type, all grouping columns are automatically supplemented with sort columns. - + If the materialized view is a non-aggregated type, the first 36 bytes are automatically supplemented as a sorted column. If the number of sorts for automatic replenishment is less than three, the first three are sorted. - + If the query contains a grouping column, the sort order must be the same as the grouping column. - -3. properties - - Declare some configuration of materialized view, optional. - - ``` - PROPERTIES ("key" = "value", "key" = "value" ...) - - ``` - - The following configurations can be declared here: - - + short_key: the number of columns. - + timeout: timeout for materialized view construction. - -## example - -Base table structure is - -``` -mysql> desc duplicate_table; -+-------+--------+------+------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+-------+--------+------+------+---------+-------+ -| k1 | INT | Yes | true | N/A | | -| k2 | INT | Yes | true | N/A | | -| k3 | BIGINT | Yes | true | N/A | | -| k4 | BIGINT | Yes | true | N/A | | -+-------+--------+------+------+---------+-------+ -``` - -1. Create a materialized view containing only the columns of the original table (k1, k2) - - ``` - create materialized view k1_k2 as -select k1, k2 from duplicate_table; - ``` - - The materialized view's schema is shown below. The materialized view contains only two columns k1, k2 without any aggregation. - - ``` - +-----------------+-------+--------+------+------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +-----------------+-------+--------+------+------+---------+-------+ - | k1_k2 | k1 | INT | Yes | true | N/A | | - | | k2 | INT | Yes | true | N/A | | - +-----------------+-------+--------+------+------+---------+-------+ - ``` - -2. Create a materialized view sorted by k2 - - ``` - create materialized view k2_order as -select k2, k1 from duplicate_table order by k2; -``` - - The materialized view's schema is shown below. The materialized view contains only two columns k2, k1, where column k2 is a sorted column without any aggregation. - - ``` - +-----------------+-------+--------+------+-------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +-----------------+-------+--------+------+-------+---------+-------+ - | k2_order | k2 | INT | Yes | true | N/A | | - | | k1 | INT | Yes | false | N/A | NONE | - +-----------------+-------+--------+------+-------+---------+-------+ - ``` - -3. Create a materialized view grouped by k1, k2 with k3 as the SUM aggregate - - ``` - create materialized view k1_k2_sumk3 as -select k1, k2, sum (k3) from duplicate_table group by k1, k2; - ``` - - The materialized view's schema is shown below. The materialized view contains two columns k1, k2 and sum (k3), where k1, k2 are grouped columns, and sum (k3) is the sum of the k3 columns grouped according to k1, k2. - - Because the materialized view does not declare a sort column, and the materialized view has aggregate data, the system supplements the grouping columns k1 and k2 by default. - - ``` - +-----------------+-------+--------+------+-------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +-----------------+-------+--------+------+-------+---------+-------+ - | k1_k2_sumk3 | k1 | INT | Yes | true | N/A | | - | | k2 | INT | Yes | true | N/A | | - | | k3 | BIGINT | Yes | false | N/A | SUM | - +-----------------+-------+--------+------+-------+---------+-------+ - ``` - -4. Create a materialized view to remove duplicate rows - - ``` - create materialized view deduplicate as -select k1, k2, k3, k4 from duplicate_table group by k1, k2, k3, k4; - ``` - - The materialized view schema is shown below. The materialized view contains k1, k2, k3, and k4 columns, and there are no duplicate rows. - - ``` - +-----------------+-------+--------+------+-------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +-----------------+-------+--------+------+-------+---------+-------+ - | deduplicate | k1 | INT | Yes | true | N/A | | - | | k2 | INT | Yes | true | N/A | | - | | k3 | BIGINT | Yes | true | N/A | | - | | k4 | BIGINT | Yes | true | N/A | | - +-----------------+-------+--------+------+-------+---------+-------+ - ``` - -5. Create a non-aggregated materialized view that does not declare a sort column - - The schema of all_type_table is as follows: - - ``` - +-------+--------------+------+-------+---------+-------+ - | Field | Type | Null | Key | Default | Extra | - +-------+--------------+------+-------+---------+-------+ - | k1 | TINYINT | Yes | true | N/A | | - | k2 | SMALLINT | Yes | true | N/A | | - | k3 | INT | Yes | true | N/A | | - | k4 | BIGINT | Yes | true | N/A | | - | k5 | DECIMAL(9,0) | Yes | true | N/A | | - | k6 | DOUBLE | Yes | false | N/A | NONE | - | k7 | VARCHAR(20) | Yes | false | N/A | NONE | - +-------+--------------+------+-------+---------+-------+ - ``` - - The materialized view contains k3, k4, k5, k6, k7 columns, and no sort column is declared. The creation statement is as follows: - - ``` - create materialized view mv_1 as -select k3, k4, k5, k6, k7 from all_type_table; - ``` - - The system's default supplementary sort columns are k3, k4, and k5. The sum of the number of bytes for these three column types is 4 (INT) + 8 (BIGINT) + 16 (DECIMAL) = 28 <36. So these three columns are added as sort columns. - - The materialized view's schema is as follows. You can see that the key fields of the k3, k4, and k5 columns are true, which is the sort order. The key field of the k6, k7 columns is false, that is, non-sorted. - - ``` - +----------------+-------+--------------+------+-------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +----------------+-------+--------------+------+-------+---------+-------+ - | mv_1 | k3 | INT | Yes | true | N/A | | - | | k4 | BIGINT | Yes | true | N/A | | - | | k5 | DECIMAL(9,0) | Yes | true | N/A | | - | | k6 | DOUBLE | Yes | false | N/A | NONE | - | | k7 | VARCHAR(20) | Yes | false | N/A | NONE | - +----------------+-------+--------------+------+-------+---------+-------+ - ``` - -## keyword - CREATE, MATERIALIZED, VIEW diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md deleted file mode 100644 index 1bc33a4b6e..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -{ - "title": "CREATE REPOSITORY", - "language": "en" -} ---- - - - -# CREATE REPOSITORY -## Description - This statement is used to create the warehouse. The warehouse is used for backup or recovery. Only root or superuser users can create warehouses. - Grammar: - CREATE [READ ONLY] REPOSITORY `repo_name` - WITH [BROKER `broker_name`|S3] - ON LOCATION `repo_location` - PROPERTIES ("key"="value", ...); - - Explain: - 1. The creation of warehouses depends on existing brokers, or use aws s3 protocl to connet cloud storage directly. - 2. If it is a read-only warehouse, it can only be restored on the warehouse. If not, you can backup and restore operations. - 3. According to the different types of broker or S3, PROPERTIES is different, see the example. - -## example - 1. Create a warehouse named bos_repo, which relies on BOS broker "bos_broker", and the data root directory is: bos://palo_backup. - CREATE REPOSITORY `bos_repo` - WITH BROKER `bos_broker` - ON LOCATION "bos://palo_backup" - PROPERTIES - ( - "bos_endpoint" ="http://gz.bcebos.com", - "bos_accesskey" = "bos_accesskey", - "bos_secret_accesskey"="bos_accesskey" - ); - - 2. Create the same warehouse as in Example 1, but with read-only attributes: - CREATE READ ONLY REPOSITORY `bos_repo` - WITH BROKER `bos_broker` - ON LOCATION "bos://palo_backup" - PROPERTIES - ( - "bos_endpoint" ="http://gz.bcebos.com", - "bos_accesskey" = "bos_accesskey", - "bos_secret_accesskey"="bos_secret_accesskey" - ); - - 3. Create a warehouse named hdfs_repo, which relies on Baidu HDFS broker "hdfs_broker", and the data root directory is: hdfs://hadoop-name-node:54310/path/to/repo./ - CREATE REPOSITORY `hdfs_repo` - WITH BROKER `hdfs_broker` - ON LOCATION "hdfs://hadoop-name-node:54310/path/to/repo/" - PROPERTIES - ( - "Username" = "User" - "password" = "password" - ); - - 4. 创建名为 s3_repo 的仓库, 直接链接云存储, 而不通过broker. - CREATE REPOSITORY `s3_repo` - WITH S3 - ON LOCATION "s3://s3-repo" - PROPERTIES - ( - "AWS_ENDPOINT" = "http://s3-REGION.amazonaws.com", - "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", - "AWS_SECRET_KEY"="AWS_SECRET_KEY", - "AWS_REGION" = "REGION" - ); - -## keyword -CREATE, REPOSITORY diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md deleted file mode 100644 index 177454e70f..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md +++ /dev/null @@ -1,134 +0,0 @@ ---- -{ - "title": "CREATE RESOURCE", - "language": "en" -} ---- - - - -# CREATE RESOURCE - -## Description - - This statement is used to create a resource. Only the root or admin user can create resources. Currently supports Spark, ODBC, S3 external resources. - In the future, other external resources may be added to Doris for use, such as Spark/GPU for query, HDFS/S3 for external storage, MapReduce for ETL, etc. - - Syntax: - CREATE [EXTERNAL] RESOURCE "resource_name" - PROPERTIES ("key"="value", ...); - - Explanation: - 1. The type of resource needs to be specified in PROPERTIES "type" = "[spark|odbc_catalog|s3]", currently supports spark, odbc_catalog, s3. - 2. The PROPERTIES varies according to the resource type, see the example for details. - -## Example - - 1. Create a Spark resource named spark0 in yarn cluster mode. - - ```` - CREATE EXTERNAL RESOURCE "spark0" - PROPERTIES - ( - "type" = "spark", - "spark.master" = "yarn", - "spark.submit.deployMode" = "cluster", - "spark.jars" = "xxx.jar,yyy.jar", - "spark.files" = "/tmp/aaa,/tmp/bbb", - "spark.executor.memory" = "1g", - "spark.yarn.queue" = "queue0", - "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999", - "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000", - "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", - "broker" = "broker0", - "broker.username" = "user0", - "broker.password" = "password0" - ); - ```` - - Spark related parameters are as follows: - - spark.master: Required, currently supports yarn, spark://host:port. - - spark.submit.deployMode: The deployment mode of the Spark program, required, supports both cluster and client. - - spark.hadoop.yarn.resourcemanager.address: Required when master is yarn. - - spark.hadoop.fs.defaultFS: Required when master is yarn. - - Other parameters are optional, refer to http://spark.apache.org/docs/latest/configuration.html - - Working_dir and broker need to be specified when Spark is used for ETL. described as follows: - working_dir: The directory used by the ETL. Required when spark is used as an ETL resource. For example: hdfs://host:port/tmp/doris. - broker: broker name. Required when spark is used as an ETL resource. Configuration needs to be done in advance using the `ALTER SYSTEM ADD BROKER` command. - broker.property_key: The authentication information that the broker needs to specify when reading the intermediate file generated by ETL. - - 2. Create an ODBC resource - - ```` - CREATE EXTERNAL RESOURCE `oracle_odbc` - PROPERTIES ( - "type" = "odbc_catalog", - "host" = "192.168.0.1", - "port" = "8086", - "user" = "test", - "password" = "test", - "database" = "test", - "odbc_type" = "oracle", - "driver" = "Oracle 19 ODBC driver" - ); - ```` - - The relevant parameters of ODBC are as follows: - - hosts: IP address of the external database - - driver: The driver name of the ODBC appearance, which must be the same as the Driver name in be/conf/odbcinst.ini. - - odbc_type: the type of the external database, currently supports oracle, mysql, postgresql - - user: username of the foreign database - - password: the password information of the corresponding user - - 3. Create S3 resource - - ```` - CREATE RESOURCE "remote_s3" - PROPERTIES - ( - "type" = "s3", - "s3_endpoint" = "http://bj.s3.com", - "s3_region" = "bj", - "s3_root_path" = "/path/to/root", - "s3_access_key" = "bbb", - "s3_secret_key" = "aaaa", - "s3_max_connections" = "50", - "s3_request_timeout_ms" = "3000", - "s3_connection_timeout_ms" = "1000" - ); - ```` - - S3 related parameters are as follows: - - required - - s3_endpoint: s3 endpoint - - s3_region: s3 region - - s3_root_path: s3 root directory - - s3_access_key: s3 access key - - s3_secret_key: s3 secret key - - optional - - s3_max_connections: the maximum number of s3 connections, the default is 50 - - s3_request_timeout_ms: s3 request timeout, in milliseconds, the default is 3000 - - s3_connection_timeout_ms: s3 connection timeout, in milliseconds, the default is 1000 - - -## keyword - - CREATE, RESOURCE diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md deleted file mode 100644 index b0cb703451..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -{ - "title": "CREATE TABLE LIKE", - "language": "en" -} ---- - - - -# CREATE TABLE LIKE - -## description - -Use CREATE TABLE ... LIKE to create an empty table based on the definition of another table, including any column attributes, table partitions and table properties defined in the original table: -Syntax: - -``` - CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database.]table_name LIKE [database.]table_name [WITH ROLLUP (r2,r2,r3,...)] -``` - -Explain: - 1. The replicated table structures include Column Definition, Partitions, Table Properties, and so on - 2. The SELECT privilege is required on the original table. - 3. Support to copy external table such as MySQL. - 4. Support to copy OLAP table rollup - -## Example - 1. Under the test1 Database, create an empty table with the same table structure as table1, named table2 - - CREATE TABLE test1.table2 LIKE test1.table1 - - 2. Under the test2 Database, create an empty table with the same table structure as test1.table1, named table2 - - CREATE TABLE test2.table2 LIKE test1.table1 - - 3. Under the test1 Database, create an empty table with the same table structure as table1, named table2. copy r1 and r2 rollup of table1 simultaneously - - CREATE TABLE test1.table2 LIKE test1.table1 WITH ROLLUP (r1,r2) - - 4. Under the test1 Database, create an empty table with the same table structure as table1, named table2. copy all rollup of table1 simultaneously - - CREATE TABLE test1.table2 LIKE test1.table1 WITH ROLLUP - - 5. Under the test2 Database, create an empty table with the same table structure as table1, named table2. copy r1 and r2 rollup of table1 simultaneously - - CREATE TABLE test2.table2 LIKE test1.table1 WITH ROLLUP (r1,r2) - - 6. Under the test2 Database, create an empty table with the same table structure as table1, named table2. copy all rollup of table1 simultaneously - - CREATE TABLE test2.table2 LIKE test1.table1 WITH ROLLUP - - 7. Under the test1 Database, create an empty table with the same table structure as MySQL's external table1, called table2 - - CREATE TABLE test1.table2 LIKE test1.table1 - -## keyword - -``` - CREATE,TABLE,LIKE - -``` diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE.md deleted file mode 100644 index 88b843ddfe..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE TABLE.md +++ /dev/null @@ -1,879 +0,0 @@ ---- -{ - "title": "CREATE TABLE", - "language": "en" -} ---- - - - -# CREATE TABLE - -## description - -This statement is used to create table -Syntax: - -``` - CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database.]table_name - (column_definition1[, column_definition2, ...] - [, index_definition1[, ndex_definition12,]]) - [ENGINE = [olap|mysql|broker|hive|iceberg]] - [key_desc] - [COMMENT "table comment"] - [partition_desc] - [distribution_desc] - [rollup_index] - [PROPERTIES ("key"="value", ...)] - [BROKER PROPERTIES ("key"="value", ...)]; -``` - -1. column_definition - Syntax: - `col_name col_type [agg_type] [NULL | NOT NULL] [DEFAULT "default_value"]` - Explain: - col_name: Name of column - col_type: Type of column - ``` - BOOLEAN(1 Byte) - Range: {0,1} - TINYINT(1 Byte) - Range: -2^7 + 1 ~ 2^7 - 1 - SMALLINT(2 Bytes) - Range: -2^15 + 1 ~ 2^15 - 1 - INT(4 Bytes) - Range: -2^31 + 1 ~ 2^31 - 1 - BIGINT(8 Bytes) - Range: -2^63 + 1 ~ 2^63 - 1 - LARGEINT(16 Bytes) - Range: -2^127 + 1 ~ 2^127 - 1 - FLOAT(4 Bytes) - Support scientific notation - DOUBLE(8 Bytes) - Support scientific notation - DECIMAL[(precision, scale)] (16 Bytes) - Default is DECIMAL(10, 0) - precision: 1 ~ 27 - scale: 0 ~ 9 - integer part: 1 ~ 18 - fractional part: 0 ~ 9 - Not support scientific notation - DATE(3 Bytes) - Range: 0000-01-01 ~ 9999-12-31 - DATETIME(8 Bytes) - Range: 0000-01-01 00:00:00 ~ 9999-12-31 23:59:59 - CHAR[(length)] - Fixed length string. Range: 1 ~ 255. Default: 1 - VARCHAR[(length)] - Variable length string. Range: 1 ~ 65533 - HLL (1~16385 Bytes) - HLL tpye, No need to specify length. - This type can only be queried by hll_union_agg, hll_cardinality, hll_hash functions. - BITMAP - BITMAP type, No need to specify length. Represent a set of unsigned bigint numbers, the largest element could be 2^64 - 1 - QUANTILE_STATE - QUANTILE_STATE type, No need to specify length. Represents the quantile pre-aggregation result. Currently, only numerical raw data types are supported such as `int`,`float`,`double`, etc. - If the number of elements is less than 2048, the explict data is stored. - If the number of elements is greater than 2048, the intermediate result of the pre-aggregation of the TDigest algorithm is stored. - - ``` - agg_type: Aggregation type. If not specified, the column is key column. Otherwise, the column is value column. - - * SUM、MAX、MIN、REPLACE - * HLL_UNION: Only for HLL type - * REPLACE_IF_NOT_NULL: The meaning of this aggregation type is that substitution will occur if and only if the newly imported data is a non-null value. If the newly imported data is null, Doris will still retain the original value. Note: if NOT NULL is specified in the REPLACE_IF_NOT_NULL column when the user creates the table, Doris will convert it to NULL and will not report an error to the user. Users can leverage this aggregate type to achieve importing some of columns .**It should be noted here that the default value should be NULL, not an empty string. If it is an empty string, you should replace it with an empty string**. - * BITMAP_UNION: Only for BITMAP type - * QUANTILE_UNION: Only for QUANTILE_STATE type - Allow NULL: Default is NOT NULL. NULL value should be represented as `\N` in load source file. - - Notice: - - The origin value of BITMAP_UNION column should be TINYINT, SMALLINT, INT, BIGINT. - - The origin value of QUANTILE_UNION column should be a numeric type such as TINYINT, INT, FLOAT, DOUBLE, DECIMAL, etc. -2. index_definition - Syntax: - `INDEX index_name (col_name[, col_name, ...]) [USING BITMAP] COMMENT 'xxxxxx'` - Explain: - index_name: index name - col_name: column name - Notice: - Only support BITMAP index in current version, BITMAP can only apply to single column -3. ENGINE type - Default is olap. Options are: olap, mysql, broker, hive, iceberg - 1) For mysql, properties should include: - - ``` - PROPERTIES ( - "host" = "mysql_server_host", - "port" = "mysql_server_port", - "user" = "your_user_name", - "password" = "your_password", - "database" = "database_name", - "table" = "table_name" - ) - ``` - - Notice: - "table_name" is the real table name in MySQL database. - table_name in CREATE TABLE stmt is table is Doris. They can be different or same. - MySQL table created in Doris is for accessing data in MySQL database. - Doris does not maintain and store any data from MySQL table. - - 2) For broker, properties should include: - - ``` - PROPERTIES ( - "broker_name" = "broker_name", - "path" = "file_path1[,file_path2]", - "column_separator" = "value_separator" - "line_delimiter" = "value_delimiter" - ) - ``` - - ``` - BROKER PROPERTIES( - "username" = "name", - "password" = "password" - ) - ``` - - For different broker, the broker properties are different - Notice: - Files name in "path" is separated by ",". If file name includes ",", use "%2c" instead. If file name includes "%", use "%25" instead. - Support CSV and Parquet. Support GZ, BZ2, LZ4, LZO(LZOP) - 3) For hive, properties should include: - ``` - PROPERTIES ( - "database" = "hive_db_name", - "table" = "hive_table_name", - "hive.metastore.uris" = "thrift://127.0.0.1:9083" - ) - ``` - "database" is the name of the database corresponding to the hive table, "table" is the name of the hive table, and "hive.metastore.uris" is the hive metastore service address. - - 4) For iceberg, properties should include: - ``` - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.table" = "iceberg_table_name", - "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ) - - ``` - database is the name of the database corresponding to Iceberg. - table is the name of the table corresponding to Iceberg. - hive.metastore.uris is the address of the hive metastore service. - catalog.type defaults to HIVE_CATALOG. Currently, only HIVE_CATALOG is supported, more Iceberg catalog types will be supported later. - -4. key_desc - Syntax: - key_type(k1[,k2 ...]) - Explain: - Data is order by specified key columns. And has different behaviors for different key desc. - AGGREGATE KEY: - value columns will be aggregated is key columns are same. - UNIQUE KEY: - The new incoming rows will replace the old rows if key columns are same. - DUPLICATE KEY: - All incoming rows will be saved. - the default key_type is DUPLICATE KEY, and key columns are first 36 bytes of the columns in define order. - If the number of columns in the first 36 is less than 3, the first 3 columns will be used. - NOTICE: - Except for AGGREGATE KEY, no need to specify aggregation type for value columns. -5. partition_desc - Currently, both RANGE and LIST partitioning methods are supported. - 5.1 RANGE partition - RANGE Partition has two ways to use: - 1) LESS THAN - Syntax: - - ``` - PARTITION BY RANGE (k1, k2, ...) - ( - PARTITION partition_name1 VALUES LESS THAN MAXVALUE|("value1", "value2", ...), - PARTITION partition_name2 VALUES LESS THAN MAXVALUE|("value1", "value2", ...) - ... - ) - ``` - - Explain: - Use the specified key column and the specified range of values for partitioning. - 1) Partition name only support [A-z0-9_] - 2) Partition key column's type should be: - TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME - 3) The range is [closed, open). And the lower bound of first partition is MIN VALUE of specified column type. - 4) NULL values should be save in partition which includes MIN VALUE. - 5) Support multi partition columns, the the default partition value is MIN VALUE. - 2)Fixed Range - Syntax: - ``` - PARTITION BY RANGE (k1, k2, k3, ...) - ( - PARTITION partition_name1 VALUES [("k1-lower1", "k2-lower1", "k3-lower1",...), ("k1-upper1", "k2-upper1", "k3-upper1", ...)), - PARTITION partition_name2 VALUES [("k1-lower1-2", "k2-lower1-2", ...), ("k1-upper1-2", MAXVALUE, )) - "k3-upper1-2", ... - ) - ``` - Explain: - 1)The Fixed Range is more flexible than the LESS THAN, and the left and right intervals are completely determined by the user. - 2)Others are consistent with LESS THAN. - - 5.2 LIST partition - LIST partition is divided into single column partition and multi-column partition - 1) Single column partition - Syntax. - - ``` - PARTITION BY LIST(k1) - ( - PARTITION partition_name1 VALUES IN ("value1", "value2", ...) , - PARTITION partition_name2 VALUES IN ("value1", "value2", ...) - ... - ) - ``` - - Explain: - Use the specified key column and the formulated enumeration value for partitioning. - 1) Partition name only support [A-z0-9_] - 2) Partition key column's type should be: - BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME, CHAR, VARCHAR - 3) Partition is a collection of enumerated values, partition values cannot be duplicated between partitions - 4) NULL values cannot be imported - 5) partition values cannot be defaulted, at least one must be specified - - 2) Multi-column partition - Syntax. - - ``` - PARTITION BY LIST(k1, k2) - ( - PARTITION partition_name1 VALUES IN (("value1", "value2"), ("value1", "value2"), ...) , - PARTITION partition_name2 VALUES IN (("value1", "value2"), ("value1", "value2"), ...) - ... - ) - ``` - - Explain: - 1) the partition of a multi-column partition is a collection of tuple enumeration values - 2) The number of tuple values per partition must be equal to the number of columns in the partition - 3) The other partitions are synchronized with the single column partition - -6. distribution_desc - 1) Hash - Syntax: - `DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]` - Explain: - Hash bucketing using the specified key column. - 2) Random - Syntax: - `DISTRIBUTED BY RANDOM [BUCKETS num]` - Explain: - Use random numbers for bucketing. - Suggestion: It is recommended to use random bucketing when there is no suitable key for hash bucketing to make the data of the table evenly distributed. - -7. PROPERTIES - 1) If ENGINE type is olap. User can specify storage medium, cooldown time and replication number: - - ``` - PROPERTIES ( - "storage_medium" = "[SSD|HDD]", - ["storage_cooldown_time" = "yyyy-MM-dd HH:mm:ss"], - ["remote_storage_resource" = "xxx"], - ["remote_storage_cooldown_time" = "yyyy-MM-dd HH:mm:ss"], - ["replication_num" = "3"], - ["replication_allocation" = "xxx"] - ) - ``` - - storage_medium: SSD or HDD, The default initial storage media can be specified by `default_storage_medium= XXX` in the fe configuration file `fe.conf`, or, if not, by default, HDD. - Note: when FE configuration 'enable_strict_storage_medium_check' is' True ', if the corresponding storage medium is not set in the cluster, the construction clause 'Failed to find enough host in all backends with storage medium is SSD|HDD'. - storage_cooldown_time: If storage_medium is SSD, data will be automatically moved to HDD when timeout. - Default is 30 days. - Format: "yyyy-MM-dd HH:mm:ss" - remote_storage_resource: The remote storage resource name, which needs to be used in conjunction with the storage_cold_medium parameter. - remote_storage_cooldown_time: Used in conjunction with remote_storage_resource. Indicates the expiration time of the partition stored locally. - Does not expire by default. Must be later than storage_cooldown_time if used with it. - The format is: "yyyy-MM-dd HH:mm:ss" - replication_num: Replication number of a partition. Default is 3. - replication_allocation: Specify the distribution of replicas according to the resource tag. - - If table is not range partitions. This property takes on Table level. Or it will takes on Partition level. - User can specify different properties for different partition by `ADD PARTITION` or `MODIFY PARTITION` statements. - 2) If Engine type is olap, user can set bloom filter index for column. - Bloom filter index will be used when query contains `IN` or `EQUAL`. - Bloom filter index support key columns with type except TINYINT FLOAT DOUBLE, also support value with REPLACE aggregation type. - - ``` - PROPERTIES ( - "bloom_filter_columns"="k1,k2,k3" - ) - ``` - - 3) For Colocation Join: - - ``` - PROPERTIES ( - "colocate_with"="table1" - ) - ``` - - 4) if you want to use the dynamic partitioning feature, specify it in properties. Note: Dynamic partitioning only supports RANGE partitions - - ``` - PROPERTIES ( - "dynamic_partition.enable" = "true|false", - "dynamic_partition.time_unit" = "HOUR|DAY|WEEK|MONTH", - "dynamic_partition.end" = "${integer_value}", - "dynamic_partition.prefix" = "${string_value}", - "dynamic_partition.buckets" = "${integer_value} - ) - ``` - - dynamic_partition.enable: specifies whether dynamic partitioning at the table level is enabled - dynamic_partition.time_unit: used to specify the time unit for dynamically adding partitions, which can be selected as HOUR, DAY, WEEK, and MONTH. - Attention: When the time unit is HOUR, the data type of partition column cannot be DATE. - dynamic_partition.end: used to specify the number of partitions created in advance - dynamic_partition.prefix: used to specify the partition name prefix to be created, such as the partition name prefix p, automatically creates the partition name p20200108 - dynamic_partition.buckets: specifies the number of partition buckets that are automatically created - dynamic_partition.create_history_partition: specifies whether create history partitions, default value is false - dynamic_partition.history_partition_num: used to specify the number of history partitions when enable create_history_partition - dynamic_partition.reserved_history_periods: Used to specify the range of reserved history periods - - ``` - 5) You can create multiple Rollups in bulk when building a table - grammar: - ``` - ROLLUP (rollup_name (column_name1, column_name2, ...) - [FROM from_index_name] - [PROPERTIES ("key"="value", ...)],...) - ``` - - 6) if you want to use the inmemory table feature, specify it in properties - - ``` - PROPERTIES ( - "in_memory"="true" - ) - ``` -## example - -1. Create an olap table, distributed by hash, with aggregation type. - - ``` - CREATE TABLE example_db.table_hash - ( - k1 BOOLEAN, - k2 TINYINT, - k3 DECIMAL(10, 2) DEFAULT "10.5", - v1 CHAR(10) REPLACE, - v2 INT SUM - ) - ENGINE=olap - AGGREGATE KEY(k1, k2, k3) - COMMENT "my first doris table" - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` - -2. Create an olap table, distributed by hash, with aggregation type. Also set storage medium and cooldown time. - - ``` - CREATE TABLE example_db.table_hash - ( - k1 BIGINT, - k2 LARGEINT, - v1 VARCHAR(2048) REPLACE, - v2 SMALLINT SUM DEFAULT "10" - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH (k1, k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", - "storage_cooldown_time" = "2015-06-04 00:00:00" - ); - ``` - -3. Create an olap table, distributed by hash, with aggregation type. Also set storage medium and cooldown time. - Setting up remote storage resource and cold data storage media. - ``` - CREATE TABLE example_db.table_hash - ( - k1 BIGINT, - k2 LARGEINT, - v1 VARCHAR(2048) REPLACE, - v2 SMALLINT SUM DEFAULT "10" - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH (k1, k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", - "storage_cooldown_time" = "2015-06-04 00:00:00", - "remote_storage_resource" = "remote_s3", - "remote_storage_cooldown_time" = "2015-12-04 00:00:00" - ); - ``` - -4. Create an olap table, with range partitioned, distributed by hash. Records with the same key exist at the same time, set the initial storage medium and cooling time, use default column storage. - - 1) LESS THAN - - ``` - CREATE TABLE example_db.table_range - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1) - ( - PARTITION p1 VALUES LESS THAN ("2014-01-01"), - PARTITION p2 VALUES LESS THAN ("2014-06-01"), - PARTITION p3 VALUES LESS THAN ("2014-12-01") - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", "storage_cooldown_time" = "2015-06-04 00:00:00" - ); - ``` - - Explain: - This statement will create 3 partitions: - - ``` - ( { MIN }, {"2014-01-01"} ) - [ {"2014-01-01"}, {"2014-06-01"} ) - [ {"2014-06-01"}, {"2014-12-01"} ) - ``` - - Data outside these ranges will not be loaded. - - 2) Fixed Range - ``` - CREATE TABLE table_range - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1, k2, k3) - ( - PARTITION p1 VALUES [("2014-01-01", "10", "200"), ("2014-01-01", "20", "300")), - PARTITION p2 VALUES [("2014-06-01", "100", "200"), ("2014-07-01", "100", "300")) - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD" - ); - ``` -5. Create an olap table, with list partitioned, distributed by hash. Records with the same key exist at the same time, set the initial storage medium and cooling time, use default column storage. - - 1) Single column partition - - ``` - CREATE TABLE example_db.table_list - ( - k1 INT, - k2 VARCHAR(128), - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY LIST (k1) - ( - PARTITION p1 VALUES IN ("1", "2", "3"), - PARTITION p2 VALUES IN ("4", "5", "6"), - PARTITION p3 VALUES IN ("7", "8", "9") - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", "storage_cooldown_time" = "2022-06-04 00:00:00" - ); - ``` - - Explain: - This statement will divide the data into 3 partitions as follows. - - ``` - ("1", "2", "3") - ("4", "5", "6") - ("7", "8", "9") - ``` - - Data that does not fall within these partition enumeration values will be filtered as illegal data - - 2) Multi-column partition - - ``` - CREATE TABLE example_db.table_list - ( - k1 INT, - k2 VARCHAR(128), - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY LIST (k1, k2) - ( - PARTITION p1 VALUES IN (("1", "beijing"), ("1", "shanghai")), - PARTITION p2 VALUES IN (("2", "beijing"), ("2", "shanghai")), - PARTITION p3 VALUES IN (("3", "beijing"), ("3", "shanghai")) - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", "storage_cooldown_time" = "2022-06-04 00:00:00" - ); - ``` - - Explain: - This statement will divide the data into 3 partitions as follows. - - ``` - (("1", "beijing"), ("1", "shanghai")) - (("2", "beijing"), ("2", "shanghai")) - (("3", "beijing"), ("3", "shanghai")) - ``` - - Data that is not within these partition enumeration values will be filtered as illegal data - -6. Create a mysql table - 6.1 Create MySQL table directly from external table information - ``` - CREATE EXTERNAL TABLE example_db.table_mysql - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - k4 VARCHAR(2048), - k5 DATETIME - ) - ENGINE=mysql - PROPERTIES - ( - "host" = "127.0.0.1", - "port" = "8239", - "user" = "mysql_user", - "password" = "mysql_passwd", - "database" = "mysql_db_test", - "table" = "mysql_table_test" - ) - ``` - - 6.2 Create MySQL table with external ODBC catalog resource - ``` - CREATE EXTERNAL RESOURCE "mysql_resource" - PROPERTIES - ( - "type" = "odbc_catalog", - "user" = "mysql_user", - "password" = "mysql_passwd", - "host" = "127.0.0.1", - "port" = "8239" - ); - - CREATE EXTERNAL TABLE example_db.table_mysql - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - k4 VARCHAR(2048), - k5 DATETIME - ) - ENGINE=mysql - PROPERTIES - ( - "odbc_catalog_resource" = "mysql_resource", - "database" = "mysql_db_test", - "table" = "mysql_table_test" - ); - ``` - -7. Create a broker table, with file on HDFS, line delimit by "|", column separated by "\n" - - ``` - CREATE EXTERNAL TABLE example_db.table_broker ( - k1 DATE, - k2 INT, - k3 SMALLINT, - k4 VARCHAR(2048), - k5 DATETIME - ) - ENGINE=broker - PROPERTIES ( - "broker_name" = "hdfs", - "path" = "hdfs://hdfs_host:hdfs_port/data1,hdfs://hdfs_host:hdfs_port/data2,hdfs://hdfs_host:hdfs_port/data3%2c4", - "column_separator" = "|", - "line_delimiter" = "\n" - ) - BROKER PROPERTIES ( - "username" = "hdfs_user", - "password" = "hdfs_password" - ); - ``` - -8. Create table will HLL column - - ``` - CREATE TABLE example_db.example_table - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 HLL HLL_UNION, - v2 HLL HLL_UNION - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` - -9. Create a table will BITMAP_UNION column - - ``` - CREATE TABLE example_db.example_table - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 BITMAP BITMAP_UNION, - v2 BITMAP BITMAP_UNION - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` -10. Create a table with QUANTILE_UNION column (the origin value of **v1** and **v2** columns must be **numeric** types) - - ``` - CREATE TABLE example_db.example_table - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 QUANTILE_STATE QUANTILE_UNION, - v2 QUANTILE_STATE QUANTILE_UNION - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` -11. Create 2 colocate join table. - - ``` - CREATE TABLE `t1` ( - `id` int(11) COMMENT "", - `value` varchar(8) COMMENT "" - ) ENGINE=OLAP - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "group1" - ); - CREATE TABLE `t2` ( - `id` int(11) COMMENT "", - `value` varchar(8) COMMENT "" - ) ENGINE=OLAP - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "group1" - ); - ``` - -12. Create a broker table, with file on BOS. - - ``` - CREATE EXTERNAL TABLE example_db.table_broker ( - k1 DATE - ) - ENGINE=broker - PROPERTIES ( - "broker_name" = "bos", - "path" = "bos://my_bucket/input/file", - ) - BROKER PROPERTIES ( - "bos_endpoint" = "http://bj.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy" - ); - ``` - -13. Create a table with a bitmap index - - ``` - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 CHAR(10) REPLACE, - v2 INT SUM, - INDEX k1_idx (k1) USING BITMAP COMMENT 'xxxxxx' - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - COMMENT "my first doris table" - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` - -14. Create a dynamic partitioning table (dynamic partitioning needs to be enabled in FE configuration), which creates partitions 3 days in advance every day. For example, if today is' 2020-01-08 ', partitions named 'p20200108', 'p20200109', 'p20200110', 'p20200111' will be created. - - ``` - [types: [DATE]; keys: [2020-01-08]; ‥types: [DATE]; keys: [2020-01-09]; ) - [types: [DATE]; keys: [2020-01-09]; ‥types: [DATE]; keys: [2020-01-10]; ) - [types: [DATE]; keys: [2020-01-10]; ‥types: [DATE]; keys: [2020-01-11]; ) - [types: [DATE]; keys: [2020-01-11]; ‥types: [DATE]; keys: [2020-01-12]; ) - ``` - - ``` - CREATE TABLE example_db.dynamic_partition - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1) () - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32" - ); - ``` -15. Create a table with rollup index - ``` - CREATE TABLE example_db.rolup_index_table - ( - event_day DATE, - siteid INT DEFAULT '10', - citycode SMALLINT, - username VARCHAR(32) DEFAULT '', - pv BIGINT SUM DEFAULT '0' - ) - AGGREGATE KEY(event_day, siteid, citycode, username) - DISTRIBUTED BY HASH(siteid) BUCKETS 10 - rollup ( - r1(event_day,siteid), - r2(event_day,citycode), - r3(event_day) - ) - PROPERTIES("replication_num" = "3"); - ``` - -16. Create a inmemory table: - - ``` - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 CHAR(10) REPLACE, - v2 INT SUM, - INDEX k1_idx (k1) USING BITMAP COMMENT 'xxxxxx' - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - COMMENT "my first doris table" - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ("in_memory"="true"); - ``` - -17. Create a hive external table - ``` - CREATE TABLE example_db.table_hive - ( - k1 TINYINT, - k2 VARCHAR(50), - v INT - ) - ENGINE=hive - PROPERTIES - ( - "database" = "hive_db_name", - "table" = "hive_table_name", - "hive.metastore.uris" = "thrift://127.0.0.1:9083" - ); - ``` - -18. Specify the replica distribution of the table through replication_allocation - - ``` - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5" - ) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ( - "replication_allocation"="tag.location.group_a:1, tag.location.group_b:2" - ); - - CREATE TABLE example_db.dynamic_partition - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - PARTITION BY RANGE (k1) () - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-3", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32", - "dynamic_partition."replication_allocation" = "tag.location.group_a:3" - ); - ``` - -19. Create an Iceberg external table - - ``` - CREATE TABLE example_db.t_iceberg - ENGINE=ICEBERG - PROPERTIES ( - "iceberg.database" = "iceberg_db", - "iceberg.table" = "iceberg_table", - "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - -## keyword - - CREATE,TABLE diff --git a/docs/en/sql-reference/sql-statements/Data Definition/CREATE VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/CREATE VIEW.md deleted file mode 100644 index fa6e245ed7..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/CREATE VIEW.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -{ - "title": "CREATE VIEW", - "language": "en" -} ---- - - - -# CREATE VIEW -## Description - This statement is used to create a logical view - Grammar: - - CREATE VIEW [IF NOT EXISTS] - [db_name.]view_name - (column1[ COMMENT "col comment"][, column2, ...]) - AS query_stmt - - Explain: - - 1. Views are logical views without physical storage. All queries on views are equivalent to sub-queries corresponding to views. - 2. query_stmt is arbitrarily supported SQL. - -## example - - 1. Create view example_view on example_db - - CREATE VIEW example_db.example_view (k1, k2, k3, v1) - AS - SELECT c1 as k1, k2, k3, SUM(v1) FROM example_table - WHERE k1 = 20160112 GROUP BY k1,k2,k3; - - 2. Create view with comment - - CREATE VIEW example_db.example_view - ( - k1 COMMENT "first key", - k2 COMMENT "second key", - k3 COMMENT "third key", - v1 COMMENT "first value" - ) - COMMENT "my first view" - AS - SELECT c1 as k1, k2, k3, SUM(v1) FROM example_table - WHERE k1 = 20160112 GROUP BY k1,k2,k3; - -## keyword - - CREATE,VIEW - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/Colocate Join.md b/docs/en/sql-reference/sql-statements/Data Definition/Colocate Join.md deleted file mode 100644 index d54af225aa..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/Colocate Join.md +++ /dev/null @@ -1,98 +0,0 @@ ---- -{ - "title": "Colocate Join", - "language": "en" -} ---- - - - -# Colocate Join -## Description -Colocate/Local Join means that when multiple nodes are Join, there is no data movement and network transmission, and each node is only Join locally. -The premise of Join locally is to import data from the same Join Key into a fixed node according to the same rules. - -1 How To Use: - -Simply add the property colocate_with when building a table. The value of colocate_with can be set to any one of the same set of colocate tables. -However, you need to ensure that tables in the colocate_with attribute are created first. - -If you need to Colocate Join table t1 and t2, you can build tables according to the following statements: - -CREATE TABLE `t1` ( -`id` int(11) COMMENT "", -'value ` varchar (8) COMMENT "" -) ENGINE=OLAP -DUPLICATE KEY(`id`) -DISTRIBUTED BY HASH(`id`) BUCKETS 10 -PROPERTIES ( -"colocate_with" = "t1" -); - -CREATE TABLE `t2` ( -`id` int(11) COMMENT "", -'value ` varchar (8) COMMENT "" -) ENGINE=OLAP -DUPLICATE KEY(`id`) -DISTRIBUTED BY HASH(`id`) BUCKETS 10 -PROPERTIES ( -"colocate_with" = "t1" -); - -2 Colocate Join 目前的限制: - -1. Colcoate Table must be an OLAP-type table -2. The BUCKET number of tables with the same colocate_with attribute must be the same -3. The number of copies of tables with the same colocate_with attribute must be the same -4. Data types of DISTRIBUTED Columns for tables with the same colocate_with attribute must be the same - -3 Colocate Join's applicable scenario: - -Colocate Join is well suited for scenarios where tables are bucketed according to the same field and high frequency according to the same field Join. - -4 FAQ: - -Q: 支持多张表进行Colocate Join 吗? - -A: 25903;. 25345 - -Q: Do you support Colocate table and normal table Join? - -A: 25903;. 25345 - -Q: Does the Colocate table support Join with non-bucket Key? - -A: Support: Join that does not meet Colocate Join criteria will use Shuffle Join or Broadcast Join - -Q: How do you determine that Join is executed according to Colocate Join? - -A: The child node of Hash Join in the result of explain is Colocate Join if it is OlapScanNode directly without Exchange Node. - -Q: How to modify the colocate_with attribute? - -A: ALTER TABLE example_db.my_table set ("colocate_with"="target_table"); - -Q: 229144; colcoate join? - -A: set disable_colocate_join = true; 就可以禁用Colocate Join, 查询时就会使用Shuffle Join 和Broadcast Join - -## keyword - -COLOCATE, JOIN, CREATE TABLE diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP DATABASE.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP DATABASE.md deleted file mode 100644 index 891b1eb230..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/DROP DATABASE.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -{ - "title": "DROP DATABASE", - "language": "en" -} ---- - - - -# DROP DATABASE -##Description -This statement is used to delete the database -Grammar: -DROP DATABASE [IF EXISTS] db_name; - -Explain: -1) After executing DROP DATABASE for a period of time, the deleted database can be restored through the RECOVER statement. See RECOVER statement for details -2) If DROP DATABASE FORCE is executed, the system will not check whether the database has unfinished transactions, the database will be deleted directly and cannot be recovered, generally this operation is not recommended - -## example -1. Delete database db_test -DROP DATABASE db_test; - -## keyword -DROP,DATABASE - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md deleted file mode 100644 index 352258f1bf..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -{ - "title": "DROP ENCRYPTKEY", - "language": "en" -} ---- - - - -# DROP ENCRYPTKEY - -## Description - -### Syntax - -``` -DROP ENCRYPTKEY key_name -``` - -### Parameters - -> `key_name`: To delete the name of the key, you can include the name of the database. For example: `db1.my_key`. - -Delete a custom key. A key can be deleted only if its name is identical. - -Executing this command requires the user to have the `ADMIN` privileges. - -## example - -1. Delete a key. - -``` -DROP ENCRYPTKEY my_key; -``` - -## keyword - - DROP,ENCRYPTKEY diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP INDEX.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP INDEX.md deleted file mode 100644 index ece2902003..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/DROP INDEX.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -{ - "title": "DROP INDEX", - "language": "en" -} ---- - - - -# DROP INDEX - -## description - - This statement is used to delete index from table - grammer: - DROP INDEX [IF EXISTS] index_name ON [db_name.]table_name; - -## keyword - - DROP,INDEX diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md deleted file mode 100644 index cb1f3a8603..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -{ - "title": "DROP MATERIALIZED VIEW", - "language": "en" -} ---- - - - -# DROP MATERIALIZED VIEW - -## description - This statement is used to delete a materialized view. Synchronization syntax - -syntax: - - ``` - DROP MATERIALIZED VIEW [IF EXISTS] mv_name ON table_name - ``` - -1. IF EXISTS - If the materialized view does not exist, doris will not throw an error. If this keyword is not declared, an error will be reported if the materialized view does not exist. -Ranch - -2. mv_name - The name of the materialized view to be deleted. Required. - -3. Table_name - Name of the table to which the materialized view to be deleted belongs. Required. - -## example - -Table structure is - -``` -mysql> desc all_type_table all; -+----------------+-------+----------+------+-------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+----------------+-------+----------+------+-------+---------+-------+ -| all_type_table | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | false | N/A | NONE | -| | k3 | INT | Yes | false | N/A | NONE | -| | k4 | BIGINT | Yes | false | N/A | NONE | -| | k5 | LARGEINT | Yes | false | N/A | NONE | -| | k6 | FLOAT | Yes | false | N/A | NONE | -| | k7 | DOUBLE | Yes | false | N/A | NONE | -| | | | | | | | -| k1_sumk2 | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | false | N/A | SUM | -+----------------+-------+----------+------+-------+---------+-------+ -``` - -1. Drop the materialized view named k1_sumk2 of the table all_type_table - - ``` - drop materialized view k1_sumk2 on all_type_table; - ``` - Table structure after materialized view is deleted as following: - - ``` -+----------------+-------+----------+------+-------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+----------------+-------+----------+------+-------+---------+-------+ -| all_type_table | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | false | N/A | NONE | -| | k3 | INT | Yes | false | N/A | NONE | -| | k4 | BIGINT | Yes | false | N/A | NONE | -| | k5 | LARGEINT | Yes | false | N/A | NONE | -| | k6 | FLOAT | Yes | false | N/A | NONE | -| | k7 | DOUBLE | Yes | false | N/A | NONE | -+----------------+-------+----------+------+-------+---------+-------+ - ``` - -2. Delete a non-existing materialized view in the table all_type_table - - ``` - drop materialized view k1_k2 on all_type_table; -ERROR 1064 (HY000): errCode = 2, detailMessage = Materialized view [k1_k2] does not exist in table [all_type_table] - ``` - - The delete request directly reports an error - -3. Delete the materialized view k1_k2 in the table all_type_table. Materialized view does not exist and no error is reported. - - ``` - drop materialized view if exists k1_k2 on all_type_table; -Query OK, 0 rows affected (0.00 sec) - ``` - - If it exists, it will be deleted; If it does not exist, no error will be reported. - -## keyword - DROP, MATERIALIZED, VIEW diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md deleted file mode 100644 index cb24b10b01..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -{ - "title": "DROP REPOSITORY", - "language": "en" -} ---- - - - -# DROP REPOSITORY -## Description -This statement is used to delete a created warehouse. Only root or superuser users can delete the warehouse. -Grammar: -DROP REPOSITORY `repo_name`; - -Explain: -1. Delete the warehouse, just delete the mapping of the warehouse in Palo, and do not delete the actual warehouse data. After deletion, you can map to the repository again by specifying the same broker and LOCATION. - -## example -1. Delete the warehouse named bos_repo: -DROP REPOSITORY `bos_repo`; - -## keyword -DROP, REPOSITORY diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md deleted file mode 100644 index 66342cada9..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -{ - "title": "DROP RESOURCE", - "language": "en" -} ---- - - - -# DROP RESOURCE - -## Description - - This statement is used to delete an existing resource. Only the root or admin user can delete resources. - - Syntax: - DROP RESOURCE 'resource_name' - - Note: ODBC/S3 resources that are in use cannot be deleted. - -## Example - - 1. Delete the Spark resource named spark0: - DROP RESOURCE 'spark0'; - - -## keyword - - DROP, RESOURCE diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP TABLE.md deleted file mode 100644 index a5d3b6a0de..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/DROP TABLE.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -{ - "title": "DROP TABLE", - "language": "en" -} ---- - - - -# DROP TABLE -## Description -This statement is used to delete the table. -Grammar: -DROP TABLE [IF EXISTS] [db_name.]table_name; - -Explain: -1) After executing DROP TABLE for a period of time, the deleted table can be restored through the RECOVER statement. See RECOVER statement for details -2) If DROP TABLE FORCE is executed, the system will not check whether the table has unfinished transactions, the table will be deleted directly and cannot be recovered, generally this operation is not recommended - -## example -1. Delete a table -DROP TABLE my_table; - -2. If it exists, delete the table that specifies the database -DROP TABLE IF EXISTS example_db.my_table; - -## keyword -DROP,TABLE - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/DROP VIEW.md b/docs/en/sql-reference/sql-statements/Data Definition/DROP VIEW.md deleted file mode 100644 index b34a402b4f..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/DROP VIEW.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -{ - "title": "DROP VIEW", - "language": "en" -} ---- - - - -# DROP VIEW -## Description -This statement is used to delete a logical view VIEW -Grammar: -DROP VIEW [IF EXISTS] -[db_name.]view_name; - -## example -1. If it exists, delete view example_view on example_db -DROP VIEW IF EXISTS example_db.example_view; - -## keyword -DROP,VIEW - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/HLL.md b/docs/en/sql-reference/sql-statements/Data Definition/HLL.md deleted file mode 100644 index a3f92863b2..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/HLL.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -{ - "title": "HLL", - "language": "en" -} ---- - - - -# HLL -## Description -HLL is an engineering implementation based on the HyperLogLog algorithm. It is used to store the intermediate results of the HyperLog calculation process. It can only be used as the value column type of the table. -By aggregating to reduce the amount of data continuously, in order to achieve the purpose of speeding up the query, based on which an estimated result, the error is about 1%. -The HLL column is generated by other columns or data in the imported data. When imported, the hll_hash function is used to specify which column in the data is used to generate the HLL column. -It is often used to replace count distinct, and to quickly calculate UV in business by combining rollup. - -The correlation function: - -TOTAL UNION -This function is an aggregation function, which is used to calculate the cardinality estimation of all data satisfying the conditions. This function can also be used to analyze functions. It only supports the default window and does not support the window clause. - -Coach L.u RAW AGG -This function is an aggregation function that aggregates HLL type fields and returns HLL type. - -HLL_CARDINALITY(hll) -This function is used to estimate the cardinality of a single HLL sequence - -HLL_HASH(column_name) -Generate HLL column types for insert or import, see the instructions for the use of imports - -EMPTY_HLL() -Generate empty HLL column types for insert or import, see the instructions for the use of imports - -## example -1. First create a table with HLL columns -create table test( -dt date, -id int, -name char(10), -Province of char (10), -The char (1), -the European Union, -European Union -distributed by hash(id) buckets 32; - -2. Import data. See help curl for the way you import it. - - A. Generate HLL columns using columns in tables - - curl --location-trusted -uname:password -T data -H "label:load_1" -H "columns:dt, id, name, province, os, set1=hll_hash(id), set2=hll_hash(name)" - http://host/api/test_db/test/_stream_load - - B. Generate HLL columns using a column in the data - - curl --location-trusted -uname:password -T data -H "label:load_1" -H "columns:dt, id, name, province, sex, cuid, os, set1=hll_hash(cuid), set2=hll_hash(os)" - http://host/api/test_db/test/_stream_load - -3. There are three common ways of aggregating data: (without aggregating the base table directly, the speed may be similar to that of using APPROX_COUNT_DISTINCT directly) - -A. Create a rollup that allows HLL columns to generate aggregation. -alter table test add rollup test_rollup(dt, set1); - -B. Create another table dedicated to computing uv, and insert data) - -create table test_uv( -dt date, -uv_set hll hll_union) -distributed by hash(dt) buckets 32; - -insert into test_uv select dt, set1 from test; - -C. Create another table dedicated to computing uv, then insert and generate HLL columns from other non-hll columns of test through hll_hash - -create table test_uv( -dt date, -id_set hll hll_union) -distributed by hash(dt) buckets 32; - -insert into test_uv select dt, hll_hash(id) from test; - -4. Query, HLL column is not allowed to query its original value directly, it can be queried by matching functions. - -a. 27714; 24635; uv -select HLL_UNION_AGG(uv_set) from test_uv; - -B. Seek every day's UV -select dt, HLL_CARDINALITY(uv_set) from test_uv; - -C. Find the aggregate value of Set1 in the test table -select dt, HLL_CARDINALITY(uv) from (select dt, HLL_RAW_AGG(set1) as uv from test group by dt) tmp; -select dt, HLL_UNION_AGG(set1) as uv from test group by dt; - -## keyword -HLL diff --git a/docs/en/sql-reference/sql-statements/Data Definition/RECOVER.md b/docs/en/sql-reference/sql-statements/Data Definition/RECOVER.md deleted file mode 100644 index d2f1f35019..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/RECOVER.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -{ - "title": "RECOVER", - "language": "en" -} ---- - - - -# RECOVER -## Description -This statement is used to restore previously deleted databases, tables, or partitions -Grammar: -1)24674;"22797database; -RECOVER DATABASE db_name; -2) 恢复 table -RECOVER TABLE [db_name.]table_name; -3)24674;"22797partition -RECOVER PARTITION partition name FROM [dbu name.]table name; - -Explain: -1. This operation can only recover the meta-information deleted in the previous period of time. The default is 1 day.(You can configure it with the `catalog_trash_expire_second` parameter in fe.conf) -2. If new meta-information of the same name and type is created after deleting meta-information, the previously deleted meta-information cannot be restored. - -## example -1. Restore the database named example_db -RECOVER DATABASE example_db; - -2. Restore table named example_tbl -RECOVER TABLE example_db.example_tbl; - -3. Restore partition named P1 in example_tbl -RECOVER PARTITION p1 FROM example_tbl; - -## keyword -RECOVER - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md b/docs/en/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md deleted file mode 100644 index 805b4a06d4..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "REFRESH DATABASE", - "language": "en" -} ---- - - - -# REFRESH DATABASE - -## Description - - This statement is used to synchronize the remote Iceberg database and will delete and rebuild the Iceberg tables under the current Doris database, leaving the non-Iceberg tables unaffected. - Syntax: - REFRESH DATABASE db_name; - - Instructions. - 1) Valid only for the Iceberg database mounted in Doris. - -## Example - - 1) Refresh the database iceberg_test_db - REFRESH DATABASE iceberg_test_db; - -## keyword - - REFRESH,DATABASE diff --git a/docs/en/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md deleted file mode 100644 index 69455ca043..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "REFRESH TABLE", - "language": "en" -} ---- - - - -# REFRESH TABLE - -## Description - - This statement is used to synchronize a remote Iceberg table and will delete and rebuild Doris' current external table. - Syntax. - REFRESH TABLE tbl_name; - - Instructions. - 1) Valid only for the Iceberg table mounted in Doris. - -## Example - - 1) Refresh the table iceberg_tbl - REFRESH TABLE iceberg_tbl; - -## keyword - - REFRESH,TABLE diff --git a/docs/en/sql-reference/sql-statements/Data Definition/RESTORE.md b/docs/en/sql-reference/sql-statements/Data Definition/RESTORE.md deleted file mode 100644 index 5eb9dee253..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/RESTORE.md +++ /dev/null @@ -1,87 +0,0 @@ ---- -{ - "title": "RESTORE", - "language": "en" -} ---- - - - -# RESTORE -## Description -1. RESTORE -This statement is used to restore the data previously backed up by the BACKUP command to the specified database. This command is an asynchronous operation. After successful submission, you need to check progress through the SHOW RESTORE command. Restoring tables of OLAP type is supported only. -Grammar: -SNAPSHOT RESTORE [dbu name].{snapshot name} -FROM `repository_name` -[ON|EXCLUDE] ( -"`Table `uname'[`partition (`p1',...)] [as `tbl `uu alias'], -... -) -PROPERTIES ("key"="value", ...); - -Explain: -1. Only one BACKUP or RESTORE task can be performed under the same database. -2. The ON clause identifies the tables and partitions that need to be restored. If no partition is specified, all partitions of the table are restored by default. The specified tables and partitions must already exist in the warehouse backup. -3. The EXCLUDE clause identifies the tables and partiitons that need not to be restored. All partitions of all tables in the warehouse except the specified tables or partitions will be restored. -4. The backup tables in the warehouse can be restored to new tables through AS statements. But the new table name cannot already exist in the database. Partition name cannot be changed. -5. The backup tables in the warehouse can be restored and replaced with the same-name tables in the database, but the table structure of the two tables must be completely consistent. Table structure includes: table name, column, partition, Rollup and so on. -6. Partitions of the recovery table can be specified, and the system checks whether the partition Range or List matches. -7. PROPERTIES currently supports the following attributes: -"Backup_timestamp" = "2018-05-04-16-45-08": specifies which version of the time to restore the corresponding backup must be filled in. This information can be obtained through the `SHOW SNAPSHOT ON repo;'statement. -"Replication_num" = "3": Specifies the number of replicas of the restored table or partition. The default is 3. If an existing table or partition is restored, the number of copies must be the same as the number of copies of an existing table or partition. At the same time, there must be enough hosts to accommodate multiple copies. -"Timeout" = "3600": Task timeout, default to one day. Unit seconds. -"Meta_version" = 40: Use the specified meta_version to read the previously backed up metadata. Note that as a temporary solution, this parameter is only used to restore the data backed up by the older version of Doris. The latest version of the backup data already contains meta version, no need to specify. - -## example -1. Restore backup table backup_tbl in snapshot_1 from example_repo to database example_db1 with the time version of "2018-05-04-16-45-08". Restore to one copy: -RESTORE SNAPSHOT example_db1.`snapshot_1` -FROM `example 'u repo' -ON ( `backup_tbl` ) -PROPERTIES -( -"backup_timestamp"="2018-05-04-16-45-08", -"Replication\ num" = "1" -); - -2. Restore the partitions p1, P2 of table backup_tbl in snapshot_2 and table backup_tbl2 to database example_db1 from example_repo and rename it new_tbl. The time version is "2018-05-04-17-11-01". By default, three copies are restored: -RESTORE SNAPSHOT example_db1.`snapshot_2` -FROM `example 'u repo' -ON -( -`backup_tbl` PARTITION (`p1`, `p2`), -`backup_tbl2` AS `new_tbl` -) -PROPERTIES -( -"backup_timestamp"="2018-05-04-17-11-01" -); - -3. Restore backup all partiitons of all tables in snapshot_3 from example_repo to database example_db1 except backup_tbl with the time version of "2018-05-04-18-12-18". -RESTORE SNAPSHOT example_db1.`snapshot_3` -FROM `example_repo` -EXCLUDE ( `backup_tbl` ) -PROPERTIES -( - "backup_timestamp"="2018-05-04-18-12-18" -); -## keyword -RESTORE - diff --git a/docs/en/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md b/docs/en/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md deleted file mode 100644 index 2473f98971..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -{ - "title": "SHOW ENCRYPTKEYS", - "language": "en" -} ---- - - - -# SHOW ENCRYPTKEYS - -## Description - -### Syntax - -``` -SHOW ENCRYPTKEYS [IN|FROM db] [LIKE 'key_pattern'] -``` - -### Parameters - ->`db`: the name of the database to query ->`key_pattern`: parameter used to filter key names - -View all custom keys under the database. If the user specifies a database, then view the corresponding database, otherwise query the current session's database directly. - -You need to have `ADMIN` privileges for this database. - -## Example - - ``` - mysql> SHOW ENCRYPTKEYS; - +-------------------+-------------------+ - | EncryptKey Name | EncryptKey String | - +-------------------+-------------------+ - | example_db.my_key | ABCD123456789 | - +-------------------+-------------------+ - 1 row in set (0.00 sec) - - mysql> SHOW ENCRYPTKEYS FROM example_db LIKE "%my%"; - +-------------------+-------------------+ - | EncryptKey Name | EncryptKey String | - +-------------------+-------------------+ - | example_db.my_key | ABCD123456789 | - +-------------------+-------------------+ - 1 row in set (0.00 sec) - ``` - -## keyword - - SHOW,ENCRYPTKEYS diff --git a/docs/en/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md b/docs/en/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md deleted file mode 100644 index 8ed9d60f55..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -{ - "title": "SHOW RESOURCES", - "language": "en" -} ---- - - - -# SHOW RESOURCES - -## Description - - This statement is used to display the resources that the user has permission to use. - Ordinary users can only display the resources with permission, while root or admin users can display all the resources. - - Syntax: - - SHOW RESOURCES - [ - WHERE - [NAME [ = "your_resource_name" | LIKE "name_matcher"]] - [RESOURCETYPE = ["[spark|odbc_catalog|s3]"]] - ] - [ORDER BY ...] - [LIMIT limit][OFFSET offset]; - - Explain: - 1) If use NAME LIKE, the name of resource is matched to show. - 2) If use NAME =, the specified name is exactly matched. - 3) RESOURCETYPE is specified, the corresponding rerouce type is matched. - 4) Use ORDER BY to sort any combination of columns. - 5) If LIMIT is specified, limit matching records are displayed. Otherwise, it is all displayed. - 6) If OFFSET is specified, the query results are displayed starting with the offset offset. The offset is 0 by default. - -## Example - - 1. Display all resources that the current user has permissions on - SHOW RESOURCES; - - 2. Show the specified resource, the name contains the string "20140102", and displays 10 properties - SHOW RESOURCES WHERE NAME LIKE "2014_01_02" LIMIT 10; - - 3. Display the specified resource, specify the name as "20140102" and sort in descending order by key - SHOW RESOURCES WHERE NAME = "20140102" ORDER BY `KEY` DESC; - - -## keyword - - SHOW RESOURCES, RESOURCES diff --git a/docs/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md b/docs/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md deleted file mode 100644 index 247f129d15..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -{ - "title": "TRUNCATE TABLES", - "language": "en" -} ---- - - - -# TRUNCATE TABLES -## Description -This statement is used to empty the data of the specified table and partition -Grammar: - -TRUNCATE TABLE [db.]tbl[ PARTITION(p1, p2, ...)]; - -Explain: -1. The statement empties the data, but retains the table or partition. -2. Unlike DELETE, this statement can only empty the specified tables or partitions as a whole, without adding filtering conditions. -3. Unlike DELETE, using this method to clear data will not affect query performance. -4. The data deleted by this operation is not recoverable. -5. When using this command, the table state should be NORMAL, i.e. SCHEMA CHANGE operations are not allowed. - -## example - -1. Clear the table TBL under example_db - -TRUNCATE TABLE example_db.tbl; - -2. P1 and P2 partitions of clearing TABLE tbl - -TRUNCATE TABLE tbl PARTITION(p1, p2); - -## keyword -TRUNCATE,TABLE diff --git a/docs/en/sql-reference/sql-statements/Data Definition/create-function.md b/docs/en/sql-reference/sql-statements/Data Definition/create-function.md deleted file mode 100644 index 7e29591fe7..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/create-function.md +++ /dev/null @@ -1,152 +0,0 @@ ---- -{ - "title": "CREATE FUNCTION", - "language": "en" -} ---- - - - -# CREATE FUNCTION -## Description -### Syntax - -``` -CREATE [AGGREGATE] [ALIAS] FUNCTION function_name - (arg_type [, ...]) - [RETURNS ret_type] - [INTERMEDIATE inter_type] - [WITH PARAMETER(param [,...]) AS origin_function] - [PROPERTIES ("key" = "value" [, ...]) ] -``` - -### Parameters - -> `AGGREGATE`: If this is the case, it means that the created function is an aggregate function. -> -> `ALIAS`: If this is the case, it means that the created function is an alias function. -> -> If the above two items are not present, it means that the created function is a scalar function. -> -> `Function_name`: To create the name of the function, you can include the name of the database. For example: `db1.my_func'. -> -> `arg_type`: The parameter type of the function is the same as the type defined at the time of table building. Variable-length parameters can be represented by `,...`. If it is a variable-length type, the type of the variable-length part of the parameters is the same as the last non-variable-length parameter type. -> **NOTICE**: `ALIAS FUNCTION` variable-length parameters are not supported, and there is at least one parameter. In particular, the type `ALL` refers to any data type and can only be used for `ALIAS FUNCTION`. -> -> `ret_type`: Required for creating a new function. This parameter is not required if you are aliasing an existing function. -> -> `inter_type`: A data type used to represent the intermediate stage of an aggregate function. -> -> `param`: The parameter used to represent the alias function, containing at least one. -> -> `origin_function`: Used to represent the original function corresponding to the alias function. -> -> `properties`: Used to set properties related to aggregate function and scalar function. Properties that can be set include -> -> "Object_file": Custom function dynamic library URL path, currently only supports HTTP/HTTPS protocol, this path needs to remain valid throughout the life cycle of the function. This option is mandatory -> -> "symbol": Function signature of scalar functions for finding function entries from dynamic libraries. This option is mandatory for scalar functions -> -> "init_fn": Initialization function signature of aggregate function. Necessary for aggregation functions -> -> "update_fn": Update function signature of aggregate function. Necessary for aggregation functions -> -> "merge_fn": Merge function signature of aggregate function. Necessary for aggregation functions -> -> "serialize_fn": Serialized function signature of aggregate function. For aggregation functions, it is optional, and if not specified, the default serialization function will be used -> -> "finalize_fn": A function signature that aggregates functions to obtain the final result. For aggregation functions, it is optional. If not specified, the default fetch result function will be used. -> -> "md5": The MD5 value of the function dynamic link library, which is used to verify that the downloaded content is correct. This option is optional -> -> "prepare_fn": Function signature of the prepare function for finding the entry from the dynamic library. This option is optional for custom functions -> -> "close_fn": Function signature of the close function for finding the entry from the dynamic library. This option is optional for custom functions -> "type": Function type, RPC for remote udf, NATIVE for c++ native udf - - - -This statement creates a custom function. Executing this command requires that the user have `ADMIN` privileges. - -If the `function_name` contains the database name, the custom function will be created in the corresponding database, otherwise the function will be created in the database where the current session is located. The name and parameters of the new function cannot be the same as functions already existing in the current namespace, otherwise the creation will fail. But only with the same name and different parameters can the creation be successful. - -## example - -1. Create a custom scalar function - - ``` - CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES ( - "symbol" = "_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_", - "object_file" ="http://host:port/libmyadd.so" - ); - ``` -2. Create a custom scalar function with prepare/close functions - - ``` - CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES ( - "symbol" = "_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_", - "prepare_fn" = "_ZN9doris_udf14AddUdf_prepareEPNS_15FunctionContextENS0_18FunctionStateScopeE", - "close_fn" = "_ZN9doris_udf12AddUdf_closeEPNS_15FunctionContextENS0_18FunctionStateScopeE", - "object_file" = "http://host:port/libmyadd.so" - ); - ``` - -3. Create a custom aggregation function - - ``` - CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES ( - "init_fn"="_ZN9doris_udf9CountInitEPNS_15FunctionContextEPNS_9BigIntValE", - "update_fn"="_ZN9doris_udf11CountUpdateEPNS_15FunctionContextERKNS_6IntValEPNS_9BigIntValE", - "merge_fn"="_ZN9doris_udf10CountMergeEPNS_15FunctionContextERKNS_9BigIntValEPS2_", - "finalize_fn"="_ZN9doris_udf13CountFinalizeEPNS_15FunctionContextERKNS_9BigIntValE", - "object_file"="http://host:port/libudasample.so" - ); - ``` - -4. Create a scalar function with variable length parameters - - ``` - CREATE FUNCTION strconcat(varchar, ...) RETURNS varchar properties ( - "symbol" = "_ZN9doris_udf6StrConcatUdfEPNS_15FunctionContextERKNS_6IntValES4_", - "object_file" = "http://host:port/libmyStrConcat.so" - ); - ``` - -5. Create a custom alias function - - ``` - -- create a custom functional alias function - CREATE ALIAS FUNCTION id_masking(BIGINT) WITH PARAMETER(id) - AS CONCAT(LEFT(id, 3), '****', RIGHT(id, 4)); - - -- create a custom cast alias function - CREATE ALIAS FUNCTION string(ALL, INT) WITH PARAMETER(col, length) - AS CAST(col AS varchar(length)); - ``` -6. Create a remote UDF - ``` - CREATE FUNCTION rpc_add(INT, INT) RETURNS INT PROPERTIES ( - "SYMBOL"="add_int", - "OBJECT_FILE"="127.0.0.1:9999", - "TYPE"="RPC" - ); - ``` -## keyword -CREATE,FUNCTION diff --git a/docs/en/sql-reference/sql-statements/Data Definition/drop-function.md b/docs/en/sql-reference/sql-statements/Data Definition/drop-function.md deleted file mode 100644 index 0fbaa55131..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/drop-function.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -{ - "title": "DROP FUNCTION", - "language": "en" -} ---- - - - -# DROP FUNCTION -##Description -### Syntax - -``` -DROP FUNCTION function_name -(angry type [...]) -``` - -### Parameters - ->` function_name': To delete the name of the function -> ->` arg_type`: To delete the parameter list of the function -> - - -Delete a custom function. The name of the function and the type of the parameter are exactly the same before they can be deleted. - -## example - -1. Delete a function - -``` -DROP FUNCTION my_add(INT, INT) -``` -## keyword -DROP,FUNCTION diff --git a/docs/en/sql-reference/sql-statements/Data Definition/show-functions.md b/docs/en/sql-reference/sql-statements/Data Definition/show-functions.md deleted file mode 100644 index 59b5bcb1fe..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Definition/show-functions.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -{ - "title": "SHOW FUNCTIONS", - "language": "en" -} ---- - - - -# SHOW FUNCTIONS -## Description -### Syntax - -``` -SHOW [FULL] [BUILTIN] FUNCTIONS [IN|FROM db] [LIKE 'function_pattern'] -``` - -### Parameters - ->`full`: Indicate to show the details of function ->`builtin`: Indicate to show the functions that doris provides ->`db`: The name of the database to query ->`function_pattern`: The parameter to filter function name - -Look at all the custom(builtin) functions under the database. If the user specifies the database, then look at the corresponding database, otherwise directly query the database where the current session is located. - -You need `SHOW` privileges for this database - -## example - -``` -mysql> show full functions in testDb\G -*************************** 1. row *************************** - Signature: my_add(INT,INT) - Return Type: INT - Function Type: Scalar -Intermediate Type: NULL - Properties: {"symbol":"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_","object_file":"http://host:port/libudfsample.so","md5":"cfe7a362d10f3aaf6c49974ee0f1f878"} -*************************** 2. row *************************** - Signature: my_count(BIGINT) - Return Type: BIGINT - Function Type: Aggregate -Intermediate Type: NULL - Properties: {"object_file":"http://host:port/libudasample.so","finalize_fn":"_ZN9doris_udf13CountFinalizeEPNS_15FunctionContextERKNS_9BigIntValE","init_fn":"_ZN9doris_udf9CountInitEPNS_15FunctionContextEPNS_9BigIntValE","merge_fn":"_ZN9doris_udf10CountMergeEPNS_15FunctionContextERKNS_9BigIntValEPS2_","md5":"37d185f80f95569e2676da3d5b5b9d2f","update_fn":"_ZN9doris_udf11CountUpdateEPNS_15FunctionContextERKNS_6IntValEPNS_9BigIntValE"} -*************************** 3. row *************************** - Signature: id_masking(BIGINT) - Return Type: VARCHAR - Function Type: Alias -Intermediate Type: NULL - Properties: {"parameter":"id","origin_function":"concat(left(`id`, 3), `****`, right(`id`, 4))"} - -3 rows in set (0.00 sec) -mysql> show builtin functions in testDb like 'year%'; -+---------------+ -| Function Name | -+---------------+ -| year | -| years_add | -| years_diff | -| years_sub | -+---------------+ -2 rows in set (0.00 sec) -``` - -## keyword -SHOW,FUNCTIONS diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/BEGIN.md b/docs/en/sql-reference/sql-statements/Data Manipulation/BEGIN.md deleted file mode 100644 index 069eeeb786..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/BEGIN.md +++ /dev/null @@ -1,92 +0,0 @@ ---- -{ - "title": "BEGIN", - "language": "en" -} ---- - - - -# BEGIN, COMMIT, ROLLBACK -## Description -### Syntax - -``` -BEGIN; -INSERT INTO table_name ... -COMMIT; -``` -``` -BEGIN [ WITH LABEL label]; -INSERT INTO table_name ... -ROLLBACK; -``` -### Parameters - -> label: the label for this transaction, if you need to set it to a string. - -### Note - -A transaction can only be used on insert, nor update or delete. You can check the state of this transaction by `SHOW TRANSACTION WHERE LABEL = 'label'` - -## example - -1. Begin a transaction without a label, then commit it - -``` -BEGIN -INSERT INTO test VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, DEFAULT); -INSERT INTO test (c1) VALUES (1); -COMMIT: -``` - -All the data in the sql between `begin` and `commit` will be inserted into the table. - -2. Begin a transaction without a label, then abort it - -``` -BEGIN -INSERT INTO test VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, DEFAULT); -INSERT INTO test (c1) VALUES (1); -ROLLBACK: -``` - -All the data in the sql between `begin` and `rollback` will be aborted, nothing will be inserted into the table. - -3. Begin a transaction with a label, then commit it - -``` -BEGIN WITH LABEL test_label1 -INSERT INTO test VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, DEFAULT); -INSERT INTO test (c1) VALUES (1); -COMMIT: -``` - -All the data in the sql between `begin` and `commit` will be inserted into the table. -The label of `test_label1` will be set to mark this transaction. You can check this transaction by `SHOW TRANSACTION WHERE LABEL = 'test_label1'`. - -## keyword -BEGIN, COMMIT, ROLLBACK diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md deleted file mode 100644 index 312901e324..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md +++ /dev/null @@ -1,587 +0,0 @@ ---- -{ - "title": "BROKER LOAD", - "language": "en" -} ---- - - - -# BROKER LOAD -## description - - Broker load will load data into Doris via Broker. - Use `show broker;` to see the Broker deployed in cluster. - - Support following data sources: - - 1. Baidu HDFS: hdfs for Baidu. Only be used inside Baidu. - 2. Baidu AFS: afs for Baidu. Only be used inside Baidu. - 3. Baidu Object Storage(BOS): BOS on Baidu Cloud. - 4. Apache HDFS. - 5. Amazon S3: Amazon S3. - -### Syntax: - - LOAD LABEL load_label - ( - data_desc1[, data_desc2, ...] - ) - WITH [BROKER broker_name | S3] - [load_properties] - [opt_properties]; - - 1. load_label - - Unique load label within a database. - syntax: - [database_name.]your_label - - 2. data_desc - - To describe the data source. - syntax: - [MERGE|APPEND|DELETE] - DATA INFILE - ( - "file_path1"[, file_path2, ...] - ) - [NEGATIVE] - INTO TABLE `table_name` - [PARTITION (p1, p2)] - [COLUMNS TERMINATED BY "column_separator"] - [FORMAT AS "file_type"] - [(column_list)] - [SET (k1 = func(k2))] - [PRECEDING FILTER predicate] - [WHERE predicate] - [DELETE ON label=true] - [read_properties] - - Explain: - file_path: - - File path. Support wildcard. Must match to file, not directory. - - PARTITION: - - Data will only be loaded to specified partitions. Data out of partition's range will be filtered. If not specifed, all partitions will be loaded. - - NEGATIVE: - - If this parameter is specified, it is equivalent to importing a batch of "negative" data to offset the same batch of data loaded before. - - This parameter applies only to the case where there are value columns and the aggregation type of value columns is only SUM. - - column_separator: - - Used to specify the column separator in the import file. Default is `\t`. - If the character is invisible, it needs to be prefixed with `\\x`, using hexadecimal to represent the separator. - - For example, the separator `\x01` of the hive file is specified as `\\ x01` - - file_type: - - Used to specify the type of imported file, such as parquet, orc, csv. Default values are determined by the file suffix name. - - column_list: - - Used to specify the correspondence between columns in the import file and columns in the table. - - When you need to skip a column in the import file, specify it as a column name that does not exist in the table. - - syntax: - (col_name1, col_name2, ...) - - SET: - - If this parameter is specified, a column of the source file can be transformed according to a function, and then the transformed result can be loaded into the table. The grammar is `column_name = expression`. Some examples are given to help understand. - - Example 1: There are three columns "c1, c2, c3" in the table. The first two columns in the source file correspond in turn (c1, c2), and the last two columns correspond to c3. Then, column (c1, c2, tmp_c3, tmp_c4) SET (c3 = tmp_c3 + tmp_c4) should be specified. - - Example 2: There are three columns "year, month, day" in the table. There is only one time column in the source file, in the format of "2018-06-01:02:03". Then you can specify columns (tmp_time) set (year = year (tmp_time), month = month (tmp_time), day = day (tmp_time)) to complete the import. - - PRECEDING FILTER predicate: - - Used to filter original data. The original data is the data without column mapping and transformation. The user can filter the data before conversion, select the desired data, and then perform the conversion. - - WHERE: - - After filtering the transformed data, data that meets where predicates can be loaded. Only column names in tables can be referenced in WHERE statements. - - merge_type: - - The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete on condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics - - delete_on_predicates: - - Only used when merge type is MERGE - - read_properties: - - Used to specify some special parameters. - Syntax: - [PROPERTIES ("key"="value", ...)] - - You can specify the following parameters: - - line_delimiter: Used to specify the line delimiter in the load file. The default is `\n`. You can use a combination of multiple characters as the column separator. - - fuzzy_parse: Boolean type, true to indicate that parse json schema as the first line, this can make import more faster,but need all key keep the order of first line, default value is false. Only use for json format. - - jsonpaths: There are two ways to import json: simple mode and matched mode. - simple mode: it is simple mode without setting the jsonpaths parameter. In this mode, the json data is required to be the object type. For example: - {"k1": 1, "k2": 2, "k3": "hello"}, where k1, k2, k3 are column names. - - matched mode: the json data is relatively complex, and the corresponding value needs to be matched through the jsonpaths parameter. - - strip_outer_array: Boolean type, true to indicate that json data starts with an array object and flattens objects in the array object, default value is false. For example: - [ - {"k1" : 1, "v1" : 2}, - {"k1" : 3, "v1" : 4} - ] - if strip_outer_array is true, and two rows of data are generated when imported into Doris. - - json_root: json_root is a valid JSONPATH string that specifies the root node of the JSON Document. The default value is "". - - num_as_string: Boolean type, true means that when parsing the json data, it will be converted into a number type and converted into a string, and then it will be imported without loss of precision. - - 3. broker_name - - The name of the Broker used can be viewed through the `show broker` command. - - 4. load_properties - - Used to provide Broker access to data sources. Different brokers, and different access methods, need to provide different information. - - 4.1. Baidu HDFS/AFS - - Access to Baidu's internal hdfs/afs currently only supports simple authentication, which needs to be provided: - - username: hdfs username - password: hdfs password - - 4.2. BOS - - bos_endpoint. - bos_accesskey: cloud user's accesskey - bos_secret_accesskey: cloud user's secret_accesskey - - 4.3. Apache HDFS - - Community version of HDFS supports simple authentication, Kerberos authentication, and HA configuration. - - Simple authentication: - hadoop.security.authentication = simple (default) - username: hdfs username - password: hdfs password - - kerberos authentication: - hadoop.security.authentication = kerberos - kerberos_principal: kerberos's principal - kerberos_keytab: path of kerberos's keytab file. This file should be able to access by Broker - kerberos_keytab_content: Specify the contents of the KeyTab file in Kerberos after base64 encoding. This option is optional from the kerberos_keytab configuration. - - namenode HA: - By configuring namenode HA, new namenode can be automatically identified when the namenode is switched - dfs.nameservices: hdfs service name, customize, eg: "dfs.nameservices" = "my_ha" - dfs.ha.namenodes.xxx: Customize the name of a namenode, separated by commas. XXX is a custom name in dfs. name services, such as "dfs. ha. namenodes. my_ha" = "my_nn" - dfs.namenode.rpc-address.xxx.nn: Specify RPC address information for namenode, where NN denotes the name of the namenode configured in dfs.ha.namenodes.xxxx, such as: "dfs.namenode.rpc-address.my_ha.my_nn"= "host:port" - dfs.client.failover.proxy.provider: Specify the provider that client connects to namenode by default: org. apache. hadoop. hdfs. server. namenode. ha. Configured Failover ProxyProvider. - 4.4. Amazon S3 - - fs.s3a.access.key: AmazonS3的access key - fs.s3a.secret.key: AmazonS3的secret key - fs.s3a.endpoint: AmazonS3的endpoint - 4.5. If using the S3 protocol to directly connect to the remote storage, you need to specify the following attributes - - ( - "AWS_ENDPOINT" = "", - "AWS_ACCESS_KEY" = "", - "AWS_SECRET_KEY"="", - "AWS_REGION" = "" - ) - 4.6. if using load with hdfs, you need to specify the following attributes - ( - "fs.defaultFS" = "", - "hdfs_user"="", - "dfs.nameservices"="my_ha", - "dfs.ha.namenodes.xxx"="my_nn1,my_nn2", - "dfs.namenode.rpc-address.xxx.my_nn1"="host1:port", - "dfs.namenode.rpc-address.xxx.my_nn2"="host2:port", - "dfs.client.failover.proxy.provider.xxx"="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ) - fs.defaultFS: defaultFS - hdfs_user: hdfs user - namenode HA: - By configuring namenode HA, new namenode can be automatically identified when the namenode is switched - dfs.nameservices: hdfs service name, customize, eg: "dfs.nameservices" = "my_ha" - dfs.ha.namenodes.xxx: Customize the name of a namenode, separated by commas. XXX is a custom name in dfs. name services, such as "dfs. ha. namenodes. my_ha" = "my_nn" - dfs.namenode.rpc-address.xxx.nn: Specify RPC address information for namenode, where NN denotes the name of the namenode configured in dfs.ha.namenodes.xxxx, such as: "dfs.namenode.rpc-address.my_ha.my_nn"= "host:port" - dfs.client.failover.proxy.provider: Specify the provider that client connects to namenode by default: org. apache. hadoop. hdfs. server. namenode. ha. Configured Failover ProxyProvider. - - 5. opt_properties - - Used to specify some special parameters. - Syntax: - [PROPERTIES ("key"="value", ...)] - - You can specify the following parameters: - - timout: Specifies the timeout time for the import operation. The default timeout is 4 hours per second. - - max_filter_ratio: Data ratio of maximum tolerance filterable (data irregularity, etc.). Default zero tolerance. - - exc_mem_limit: Memory limit. Default is 2GB. Unit is Bytes. - - strict_mode: Whether the data is strictly restricted. The default is false. - - timezone: Specify time zones for functions affected by time zones, such as strftime/alignment_timestamp/from_unixtime, etc. See the documentation for details. If not specified, use the "Asia/Shanghai" time zone. - - send_batch_parallelism: Used to set the default parallelism for sending batch, if the value for parallelism exceed `max_send_batch_parallelism_per_job` in BE config, then the coordinator BE will use the value of `max_send_batch_parallelism_per_job`. - - load_to_single_tablet: Boolean type, True means that one task can only load data to one tablet in the corresponding partition at a time. The default value is false. The number of tasks for the job depends on the overall concurrency. This parameter can only be set when loading data into the OLAP table with random partition. - - 6. Load data format sample - - Integer(TINYINT/SMALLINT/INT/BIGINT/LARGEINT): 1, 1000, 1234 - Float(FLOAT/DOUBLE/DECIMAL): 1.1, 0.23, .356 - Date(DATE/DATETIME): 2017-10-03, 2017-06-13 12:34:03. - (Note: If it's in other date formats, you can use strftime or time_format functions to convert in the import command) - - String(CHAR/VARCHAR): "I am a student", "a" - NULL: \N - -## example - - 1. Load a batch of data from HDFS, specify timeout and filtering ratio. Use the broker with the plaintext ugi my_hdfs_broker. Simple authentication. - - LOAD LABEL example_db.label1 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - ) - WITH BROKER my_hdfs_broker - ( - "username" = "hdfs_user", - "password" = "hdfs_passwd" - ) - PROPERTIES - ( - "timeout" = "3600", - "max_filter_ratio" = "0.1" - ); - - Where hdfs_host is the host of the namenode and hdfs_port is the fs.defaultFS port (default 9000) - - 2. Load a batch of data from AFS contains multiple files. Import different tables, specify separators, and specify column correspondences. - - LOAD LABEL example_db.label2 - ( - DATA INFILE("afs://afs_host:hdfs_port/user/palo/data/input/file1") - INTO TABLE `my_table_1` - COLUMNS TERMINATED BY "," - (k1, k3, k2, v1, v2), - DATA INFILE("afs://afs_host:hdfs_port/user/palo/data/input/file2") - INTO TABLE `my_table_2` - COLUMNS TERMINATED BY "\t" - (k1, k2, k3, v2, v1) - ) - WITH BROKER my_afs_broker - ( - "username" = "afs_user", - "password" = "afs_passwd" - ) - PROPERTIES - ( - "timeout" = "3600", - "max_filter_ratio" = "0.1" - ); - - - 3. Load a batch of data from HDFS, specify hive's default delimiter \\x01, and use wildcard * to specify all files in the directory. Use simple authentication and configure namenode HA at the same time - - LOAD LABEL example_db.label3 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*") - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\\x01" - ) - WITH BROKER my_hdfs_broker - ( - "username" = "hdfs_user", - "password" = "hdfs_passwd", - "dfs.nameservices" = "my_ha", - "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2", - "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port", - "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port", - "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ) - - 4. Load a batch of "negative" data from HDFS. Use Kerberos authentication to provide KeyTab file path. - - LOAD LABEL example_db.label4 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file) - NEGATIVE - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\t" - ) - WITH BROKER my_hdfs_broker - ( - "hadoop.security.authentication" = "kerberos", - "kerberos_principal"="doris@YOUR.COM", - "kerberos_keytab"="/home/palo/palo.keytab" - ) - - 5. Load a batch of data from HDFS, specify partition. At the same time, use Kerberos authentication mode. Provide the KeyTab file content encoded by base64. - - LOAD LABEL example_db.label5 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, k3, k2, v1, v2) - ) - WITH BROKER my_hdfs_broker - ( - "hadoop.security.authentication"="kerberos", - "kerberos_principal"="doris@YOUR.COM", - "kerberos_keytab_content"="BQIAAABEAAEACUJBSURVLkNPTQAEcGFsbw" - ) - - 6. Load a batch of data from BOS, specify partitions, and make some transformations to the columns of the imported files, as follows: - - Table schema: - k1 varchar(20) - k2 int - - Assuming that the data file has only one row of data: - - Adele,1,1 - - The columns in the data file correspond to the columns specified in the load statement: - - k1,tmp_k2,tmp_k3 - - transform as: - - 1) k1: unchanged - 2) k2: sum of tmp_k2 and tmp_k3 - - LOAD LABEL example_db.label6 - ( - DATA INFILE("bos://my_bucket/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, tmp_k2, tmp_k3) - SET ( - k2 = tmp_k2 + tmp_k3 - ) - ) - WITH BROKER my_bos_broker - ( - "bos_endpoint" = "http://bj.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy" - ) - - 7. Load data into tables containing HLL columns, which can be columns in tables or columns in data - - If there are 4 columns in the table are (id, v1, v2, v3). The v1 and v2 columns are hll columns. The imported source file has 3 columns, where the first column in the table = the first column in the source file, and the second and third columns in the table are the second and third columns in the source file, and the third column in the table is transformed. The four columns do not exist in the source file. - Then (column_list) declares that the first column is id, and the second and third columns are temporarily named k1, k2. - - In SET, the HLL column in the table must be specifically declared hll_hash. The V1 column in the table is equal to the hll_hash (k1) column in the original data.The v3 column in the table does not have a corresponding value in the original data, and empty_hll is used to supplement the default value. - - LOAD LABEL example_db.label7 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (id, k1, k2) - SET ( - v1 = hll_hash(k1), - v2 = hll_hash(k2), - v3 = empty_hll() - ) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - LOAD LABEL example_db.label8 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, k2, tmp_k3, tmp_k4, v1, v2) - SET ( - v1 = hll_hash(tmp_k3), - v2 = hll_hash(tmp_k4) - ) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - 8. Data in load Parquet file specifies FORMAT as parquet. By default, it is judged by file suffix. - - LOAD LABEL example_db.label9 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - FORMAT AS "parquet" - (k1, k2, k3) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - 9. Extract partition fields in file paths - - If necessary, partitioned fields in the file path are resolved based on the field type defined in the table, similar to the Partition Discovery function in Spark. - - LOAD LABEL example_db.label10 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/*/*") - INTO TABLE `my_table` - FORMAT AS "csv" - (k1, k2, k3) - COLUMNS FROM PATH AS (city, utc_date) - SET (uniq_id = md5sum(k1, city)) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - Directory `hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing` contains following files: - - [hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/utc_date=2019-06-26/0000.csv, hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/utc_date=2019-06-26/0001.csv, ...] - - Extract city and utc_date fields in the file path - - 10. To filter the load data, columns whose K1 value is greater than K2 value can be imported. - - LOAD LABEL example_db.label10 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - where k1 > k2 - ); - - 11. Extract date partition fields in file paths, and date time include %3A (in hdfs path, all ':' will be replaced by '%3A') - - Assume we have files: - - /user/data/data_time=2020-02-17 00%3A00%3A00/test.txt - /user/data/data_time=2020-02-18 00%3A00%3A00/test.txt - - Table schema is: - data_time DATETIME, - k2 INT, - k3 INT - - LOAD LABEL example_db.label12 - ( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl12` - COLUMNS TERMINATED BY "," - (k2,k3) - COLUMNS FROM PATH AS (data_time) - SET (data_time=str_to_date(data_time, '%Y-%m-%d %H%%3A%i%%3A%s')) - ) - WITH BROKER "hdfs" ("username"="user", "password"="pass"); - - 12. Load a batch of data from HDFS, specify timeout and filtering ratio. Use the broker with the plaintext ugi my_hdfs_broker. Simple authentication. delete the data when v2 >100, other append - - LOAD LABEL example_db.label1 - ( - MERGE DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\t" - (k1, k2, k3, v2, v1) - ) - DELETE ON v2 >100 - WITH BROKER my_hdfs_broker - ( - "username" = "hdfs_user", - "password" = "hdfs_passwd" - ) - PROPERTIES - ( - "timeout" = "3600", - "max_filter_ratio" = "0.1" - ); - - 13. Filter the original data first, and perform column mapping, conversion and filtering operations - - LOAD LABEL example_db.label_filter - ( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl1` - COLUMNS TERMINATED BY "," - (k1,k2,v1,v2) - SET (k1 = k1 +1) - PRECEDING FILTER k1 > 2 - WHERE k1 > 3 - ) - with BROKER "hdfs" ("username"="user", "password"="pass"); - - 14. Import the data in the json file, and specify format as json, it is judged by the file suffix by default, set parameters for reading data - - LOAD LABEL example_db.label9 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - FORMAT AS "json" - (k1, k2, k3) - properties("fuzzy_parse"="true", "strip_outer_array"="true") - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - 15. LOAD WITH HDFS, normal HDFS cluster - LOAD LABEL example_db.label_filter - ( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl1` - COLUMNS TERMINATED BY "," - (k1,k2,v1,v2) - ) - with HDFS ( - "fs.defaultFS"="hdfs://testFs", - "hdfs_user"="user" - ); - 16. LOAD WITH HDFS, hdfs ha - LOAD LABEL example_db.label_filter - ( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl1` - COLUMNS TERMINATED BY "," - (k1,k2,v1,v2) - ) - with HDFS ( - "fs.defaultFS"="hdfs://testFs", - "hdfs_user"="user", - "dfs.nameservices"="my_ha", - "dfs.ha.namenodes.xxx"="my_nn1,my_nn2", - "dfs.namenode.rpc-address.xxx.my_nn1"="host1:port", - "dfs.namenode.rpc-address.xxx.my_nn2"="host2:port", - "dfs.client.failover.proxy.provider.xxx"="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ); - -## keyword - - BROKER,LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md deleted file mode 100644 index 0ab4c63888..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL DELETE.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -{ - "title": "CANCEL DELETE", - "language": "en" -} ---- - - - -# CANCEL DELETE -Description - -This statement is used to undo a DELETE operation. (Administrator only!) (To be realized) - -'35;'35; example - -## keyword -CANCEL,DELETE - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md b/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md deleted file mode 100644 index 990f3b4331..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LABEL.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -{ - "title": "Cancel Label", - "language": "en" -} ---- - - - -# Cancel Label -Description -NAME: -cancel_label: cancel a transaction with label - -SYNOPSIS -curl -u user:passwd -XPOST http://host:port/api/{db}/{label}/_cancel - -DESCRIPTION -This command is used to cancel a transaction corresponding to a specified Label, which can be successfully cancelled during the Prepare phase. - -RETURN VALUES -When the execution is complete, the relevant content of this import will be returned in Json format. Currently includes the following fields -Status: Successful cancel -Success: 成功cancel事务 -20854; 2018282: 22833; 361333; -Message: Specific Failure Information - -ERRORS - -'35;'35; example - -1. cancel testDb, testLabel20316;- 19994; -curl -u root -XPOST http://host:port/api/testDb/testLabel/_cancel - -## keyword -Cancel, Label diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md deleted file mode 100644 index 3a122d4bcb..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "CANCEL LOAD", - "language": "en" -} ---- - - - -# CANCEL LOAD -Description - -This statement is used to undo the import job for the batch of the specified load label. -This is an asynchronous operation, which returns if the task is submitted successfully. After execution, you can use the SHOW LOAD command to view progress. -Grammar: -CANCEL LOAD -[FROM both names] -WHERE LABEL = "load_label"; - -'35;'35; example - -1. Revoke the import job of example_db_test_load_label on the database example_db -CANCEL LOAD -FROM example_db -WHERE LABEL = "example_db_test_load_label"; - -## keyword -CANCEL,LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md deleted file mode 100644 index 9a46486c87..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md +++ /dev/null @@ -1,165 +0,0 @@ ---- -{ - "title": "CREATE SYNC JOB", - "language": "en" -} ---- - - - -# CREATE SYNC JOB - -## description - -The sync job feature supports to submit a resident SyncJob, and CDC (change data capture) the user's update operation in MySQL database by reading the binlog log from the specified remote address. - -At present, data synchronization only supports docking with the canal, getting the parsed binlog from the canal server and loading it into Doris. - -You can view the SyncJob's status by command 'SHOW SYNC JOB'. - -Syntax: - -``` -CREATE SYNC [db.]job_name - ( - channel_desc, - channel_desc - ... - ) -binlog_desc -``` - -1. `job_name` - - job_Name is the unique identifier of the SyncJob in the current database. With a specified job name, only one SyncJob can be running at the same time. - -2. `channel_desc` - - The data channel under the job is used to describe the mapping relationship between the MySQL source table and the Doris target table. - - Syntax: - - ``` - FROM mysql_db.src_tbl INTO des_tbl - [partitions] - [columns_mapping] - ``` - - 1. `mysql_db.src_tbl` - - Specify the database and source table on the MySQL side. - - 2. `des_tbl` - - Specify the target table on the Doris side. Only the unique table is supported, and the batch delete feature of the table needs to be enabled. - - 3. `partitions` - - Specify which partitions to be load into in target table. If it is not specified, it will be automatically loaded into the corresponding partition. - - Example: - - ``` - PARTITION(p1, p2, p3) - ``` - - 4. `column_mapping` - - Specify the mapping relationship between the columns of the MySQL source table and the Doris target table. If not specified, Fe will default that the columns of the source table and the target table correspond one by one in order. - - Columns are not supported in the 'col_name = expr' form. - - Example: - - ``` - Suppose the columns of target table are (K1, K2, V1), - - Change the order of columns K1 and K2 - COLUMNS(k2, k1, v1) - - Ignore the fourth column of the source data - COLUMNS(k2, k1, v1, dummy_column) - ``` - -3. `binlog_desc` - - It is used to describe remote data sources. Currently, only canal is supported. - - Syntax: - - ``` - FROM BINLOG - ( - "key1" = "value1", - "key2" = "value2" - ) - ``` - - 1. The attribute related to the canal is prefixed with `canal.` - - 1. canal.server.ip: the address of the canal server - 2. canal.server.port: the port of canal server - 3. canal.destination: Identifier of instance - 4. canal.batchSize: the maximum batch size. The default is 8192 - 5. canal.username: the username of instance - 6. canal.password: password of instance - 7. canal.debug: optional. When set to true, the details of each batch and each row will be printed. - -## example - -1. create a sync job named `job1` for target table `test_tbl` in `test_db`, connects to the local canal server, and corresponds to the MySQL source table `mysql_db1.tbl1` - - CREATE SYNC `test_db`.`job1` - ( - FROM `mysql_db1`.`tbl1` INTO `test_tbl ` - ) - FROM BINLOG - ( - "type" = "canal", - "canal.server.ip" = "127.0.0.1", - "canal.server.port" = "11111", - "canal.destination" = "example", - "canal.username" = "", - "canal.password" = "" - ); - -2. create a sync job named `job1` for multiple target tables in `test_db`, correspond to multiple MySQL source tables one by one, and explicitly specify column mapping. - - CREATE SYNC `test_db`.`job1` - ( - FROM `mysql_db`.`t1` INTO `test1` COLUMNS(k1, k2, v1) PARTITIONS (p1, p2), - FROM `mysql_db`.`t2` INTO `test2` COLUMNS(k3, k4, v2) PARTITION p1 - ) - FROM BINLOG - ( - "type" = "canal", - "canal.server.ip" = "xx.xxx.xxx.xx", - "canal.server.port" = "12111", - "canal.destination" = "example", - "canal.username" = "username", - "canal.password" = "password" - ); - -## keyword - - CREATE,SYNC,JOB,BINLOG - - - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md deleted file mode 100644 index be4c606cb3..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/DELETE.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -{ - "title": "DELETE", - "language": "en" -} ---- - - - -# DELETE -## Description - - This statement is used to conditionally delete data in the specified table (base index) partition. - This action deletes the rollup index data associated with this base index at the same time. - Grammar: - DELETE FROM table_name [PARTITION partition_name] - WHERE - column_name1 op { value | value_list } [ AND column_name2 op { value | value_list } ...]; - - Explain: - 1) Optional types of OP include: =, >, <, >=, <=, <=, <=, !=, in, not in - 2) Conditions on key columns can only be specified. - 2) When the selected key column does not exist in a rollup, delete cannot be performed. - 3) The relationship between conditions can only be "and". - If you want to achieve the "or" relationship, you need to divide the conditions into two DELETE statements. - 4) If it is a partitioned table, you can specify the partition. If not specified, and the session variable delete_without_partition is true, it will be applied to all partitions. If it is a single partition table, you do not need to specify it. - - Notice: - This statement may reduce query efficiency for a period of time after execution. - The degree of impact depends on the number of deletion conditions specified in the statement. - The more conditions specified, the greater the impact. - -## example - - 1. Delete rows whose K1 column value is 3 in my_table partition p 1 - DELETE FROM my_table PARTITION p1 - WHERE k1 = 3; - - 2. Delete rows whose K1 column value is greater than or equal to 3 and whose K2 column value is "abc" in my_table partition P1 - DELETE FROM my_table PARTITION p1 - WHERE k1 >= 3 AND k2 = "abc"; - - 2. Delete rows whose K1 column value is greater than or equal to 3 and whose K2 column value is "abc" in my_table partition P1,P2 - DELETE FROM my_table PARTITIONS (p1, p2) - WHERE k1 >= 3 AND k2 = "abc"; - -## keyword - DELETE - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md b/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md deleted file mode 100644 index b1646c7972..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md +++ /dev/null @@ -1,125 +0,0 @@ ---- -{ - "title": "EXPORT", - "language": "en" -} ---- - - - -# EXPORT -## Description - - This statement is used to export data from a specified table to a specified location. - This function is implemented by broker process. For different purpose storage systems, different brokers need to be deployed. Deployed brokers can be viewed through SHOW BROKER. - This is an asynchronous operation, which returns if the task is submitted successfully. After execution, you can use the SHOW EXPORT command to view progress. - - Grammar: - EXPORT TABLE table_name - [PARTITION (p1 [,p2]] - [WHERE [expr]] - TO export_path - [opt_properties] - [broker|S3]; - - 1. table_name - The table names to be exported currently support the export of tables with engine as OLAP and mysql. - - 2. partition - You can export only certain specified partitions of the specified table - - 3. expr - Export rows that meet the where condition, optional. If you leave it blank, all rows are exported by default. - - 4. export_path - The exported path needs to be a directory. At present, it can't be exported to local, so it needs to be exported to broker. - - 5. opt_properties - Used to specify some special parameters. - Grammar: - [PROPERTIES ("key"="value", ...)] - - The following parameters can be specified: - label: The identifier of this export job. You can use this identifier to view the job status later. - column_separator: Specifies the exported column separator, defaulting to t. Supports invisible characters, such as'\x07'. - column: Specify the columns to be exported, separated by commas. If you do not fill in this parameter, the default is to export all the columns of the table. - line_delimiter: Specifies the exported line separator, defaulting to\n. Supports invisible characters, such as'\x07'. - exec_mem_limit: Exports the upper limit of memory usage for a single BE node, defaulting to 2GB in bytes. - timeout: The time-out for importing jobs is 1 day by default, in seconds. - tablet_num_per_task: The maximum number of tablets that each subtask can allocate. - - 6. broker|S3 - Specify to use broker export or export through S3 protocol - Grammar: - WITH [BROKER broker_name| S3] ("key"="value"[,...]) - Here you need to specify the specific broker name and the required broker attributes, If you use the S3 protocol, you do not need to specify the broker name - - For brokers corresponding to different storage systems, the input parameters are different. Specific parameters can be referred to: `help broker load', broker required properties. - When exporting to local, you do not need to fill in this part. - - 7. hdfs - Specify to use libhdfs export to hdfs - Grammar: - WITH HDFS ("key"="value"[,...]) - - The following parameters can be specified: - fs.defaultFS: Set the fs such as:hdfs://ip:port - hdfs_user:Specify hdfs user name - -## example - - 1. Export all data from the testTbl table to HDFS - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - - 2. Export partitions P1 and P2 from the testTbl table to HDFS - EXPORT TABLE testTbl PARTITION (p1,p2) TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - - 3. Export all data in the testTbl table to hdfs, using "," as column separator, and specify label - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("label" = "mylabel", "column_separator"=",") WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - - 4. Export the row meet condition k1 = 1 in the testTbl table to hdfs. - EXPORT TABLE testTbl WHERE k1=1 TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - - 5. Export all data in the testTbl table to the local. - EXPORT TABLE testTbl TO "file:///home/data/a"; - - 6. Export all data in the testTbl table to hdfs, using the invisible character "\x07" as the column and row separator. - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("column_separator"="\\x07", "line_delimiter" = "\\x07") WITH BROKER "broker_name" ("username"="xxx", "password"="yyy") - - 7. Export column k1, v1 from the testTbl to the local. - EXPORT TABLE testTbl TO "file:///home/data/a" PROPERTIES ("columns" = "k1,v1"); - - 8. Export all data in the testTbl table to hdfs, using the invisible character "\x07" as the column and row separator. - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("column_separator"="\\x07", "line_delimiter" = "\\x07") WITH HDFS ("fs.defaultFS"="hdfs://hdfs_host:port", "hdfs_user"="yyy") - - 9. Export all data in the testTbl table to the local , the first line represents the field name - EXPORT TABLE testTbl TO "file:///home/data/a" PROPERTIES ("label" = "mylabel", "format"="csv_with_names"); - - 10. Export all data in the testTbl table to the local, the first two lines represent the field name and type - EXPORT TABLE testTbl TO "file:///home/data/a" PROPERTIES ("label" = "mylabel", "format"="csv_with_names_and_types"); - - 11. Export all data in the testTbl table to the hdfs , the first line represents the field name - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("label" = "mylabel", "format"="csv_with_names") WITH BROKER "broker_name" ("username"="myname", "password"="mypassword"); - - 12. Export all data in the testTbl table to the hdfs, the first two lines represent the field name and type - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("label" = "mylabel", "format"="csv_with_names_and_types") WITH BROKER "broker_name" ("username"="myname", "password"="mypassword"); - -## keyword - EXPORT diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md deleted file mode 100644 index 21b46c73b1..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/GET LABEL STATE.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -{ - "title": "GET LABEL STATE", - "language": "en" -} ---- - - - -# GET LABEL STATE -## Description -NAME: -get_label_state: get label's state - -SYNOPSIS -curl -u user:passwd http://host:port /api /{db}/{label}// u state - -DESCRIPTION -This command is used to view the transaction status of a Label - -RETURN VALUES -After execution, the relevant content of this import will be returned in Json format. Currently includes the following fields -Label: The imported label, if not specified, is a uuid. -Status: Whether this command was successfully executed or not, Success indicates successful execution -Message: Specific execution information -State: It only makes sense if Status is Success -UNKNOWN: No corresponding Label was found -PREPARE: The corresponding transaction has been prepared, but not yet committed -COMMITTED: The transaction has been committed and cannot be canceled -VISIBLE: Transaction submission, and data visible, cannot be canceled -ABORTED: The transaction has been ROLLBACK and the import has failed. - -ERRORS - -'35;'35; example - -1. Obtain the state of testDb, testLabel -curl -u root http://host:port /api /testDb /testLabel / u state - -## keyword -GET, LABEL, STATE diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/GROUP BY.md b/docs/en/sql-reference/sql-statements/Data Manipulation/GROUP BY.md deleted file mode 100644 index 8fcb20e568..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/GROUP BY.md +++ /dev/null @@ -1,168 +0,0 @@ ---- -{ - "title": "GROUP BY", - "language": "en" -} ---- - - - -# GROUP BY - -## description - - GROUP BY `GROUPING SETS` | `CUBE` | `ROLLUP` is an extension to GROUP BY clause. This syntax lets you define multiple groupings in the same query. GROUPING SETS produce a single result set that is equivalent to a UNION ALL of differently grouped rows - For example GROUPING SETS clause: - - ``` - SELECT a, b, SUM( c ) FROM tab1 GROUP BY GROUPING SETS ( (a, b), (a), (b), ( ) ); - ``` - - This statement is equivalent to: - - ``` - SELECT a, b, SUM( c ) FROM tab1 GROUP BY a, b - UNION - SELECT a, null, SUM( c ) FROM tab1 GROUP BY a - UNION - SELECT null, b, SUM( c ) FROM tab1 GROUP BY b - UNION - SELECT null, null, SUM( c ) FROM tab1 - ``` - - `GROUPING(expr)` indicates whether a specified column expression in a GROUP BY list is aggregated or not. GROUPING returns 1 for aggregated or 0 for not aggregated in the result set. - - `GROUPING_ID(expr [ , expr [ , ... ] ])` describes which of a list of expressions are grouped in a row produced by a GROUP BY query. The GROUPING_ID function simply returns the decimal equivalent of the binary value formed as a result of the concatenation of the values returned by the GROUPING functions. - -### Syntax - - ``` - SELECT ... - FROM ... - [ ... ] - GROUP BY [ - , ... | - GROUPING SETS [, ...] ( groupSet [ , groupSet [ , ... ] ] ) | - ROLLUP(expr [ , expr [ , ... ] ]) | - expr [ , expr [ , ... ] ] WITH ROLLUP | - CUBE(expr [ , expr [ , ... ] ]) | - expr [ , expr [ , ... ] ] WITH CUBE - ] - [ ... ] - ``` - -### Parameters - - `groupSet` is a set of expression or column or it's alias appearing in the query block’s SELECT list. `groupSet ::= { ( expr [ , expr [ , ... ] ] )}` - - `expr` is expression or column or it's alias appearing in the query block’s SELECT list. - -### Note - - doris supports PostgreSQL like syntax, for example: - - ``` - SELECT a, b, SUM( c ) FROM tab1 GROUP BY GROUPING SETS ( (a, b), (a), (b), ( ) ); - SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY ROLLUP(a,b,c) - SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY CUBE(a,b,c) - ``` - - `ROLLUP(a,b,c)` is equivalent to `GROUPING SETS` as follows: - - ``` - GROUPING SETS ( - (a,b,c), - ( a, b ), - ( a), - ( ) - ) - ``` - - `CUBE ( a, b, c )` is equivalent to `GROUPING SETS` as follows: - - ``` - GROUPING SETS ( - ( a, b, c ), - ( a, b ), - ( a, c ), - ( a ), - ( b, c ), - ( b ), - ( c ), - ( ) - ) - ``` - -## example - - This is a simple example - - ``` - > SELECT * FROM t; - +------+------+------+ - | k1 | k2 | k3 | - +------+------+------+ - | a | A | 1 | - | a | A | 2 | - | a | B | 1 | - | a | B | 3 | - | b | A | 1 | - | b | A | 4 | - | b | B | 1 | - | b | B | 5 | - +------+------+------+ - 8 rows in set (0.01 sec) - - > SELECT k1, k2, SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ); - +------+------+-----------+ - | k1 | k2 | sum(`k3`) | - +------+------+-----------+ - | b | B | 6 | - | a | B | 4 | - | a | A | 3 | - | b | A | 5 | - | NULL | B | 10 | - | NULL | A | 8 | - | a | NULL | 7 | - | b | NULL | 11 | - | NULL | NULL | 18 | - +------+------+-----------+ - 9 rows in set (0.06 sec) - - > SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ((k1, k2), (k1), (k2), ()); - +------+------+---------------+----------------+ - | k1 | k2 | grouping_id(k1,k2) | sum(`k3`) | - +------+------+---------------+----------------+ - | a | A | 0 | 3 | - | a | B | 0 | 4 | - | a | NULL | 1 | 7 | - | b | A | 0 | 5 | - | b | B | 0 | 6 | - | b | NULL | 1 | 11 | - | NULL | A | 2 | 8 | - | NULL | B | 2 | 10 | - | NULL | NULL | 3 | 18 | - +------+------+---------------+----------------+ - 9 rows in set (0.02 sec) - ``` - -## keyword - - GROUP, GROUPING, GROUPING_ID, GROUPING_SETS, GROUPING SETS, CUBE, ROLLUP diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/LOAD.md deleted file mode 100644 index 114af8331e..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/LOAD.md +++ /dev/null @@ -1,291 +0,0 @@ ---- -{ - "title": "LOAD", - "language": "en" -} ---- - - - -# LOAD -## Description - -Palo currently supports the following four import methods: - -1. Hadoop Load: Importing ETL based on MR. -2. Broker Load: Use broker to import data. -3. Mini Load: Upload files through HTTP protocol for batch data import. -4. Stream Load: Stream data import through HTTP protocol. -5. S3 Load: Directly access the storage system supporting the S3 protocol for data import through the S3 protocol. The import syntax is basically the same as that of Broker Load. - -This help mainly describes the first import method, namely Hadoop Load related help information. The rest of the import methods can use the following commands to view help: - -This import method may not be supported in a subsequent version. It is recommended that other import methods be used for data import. !!! - -1. help broker load; -2. help mini load; -3. help stream load; - -Hadoop Load is only applicable to Baidu's internal environment. Public, private and open source environments cannot use this import approach. -The import method must set up a Hadoop computing queue for ETL, which can be viewed through the help set property command. - -Grammar: - -LOAD LABEL load_label -( -Date of date of date of entry -) -[opt_properties]; - -1. load label - -The label of the current imported batch. Unique in a database. -Grammar: -[database_name.]your_label - -2. data_desc - -Used to describe a batch of imported data. -Grammar: -DATA INFILE -( -"file_path1"[, file_path2, ...] -) -[NEGATIVE] -INTO TABLE `table_name` -[PARTITION (p1, P2)] -[COLUMNS TERMINATED BY "column_separator"] -[FORMAT AS "file_type"] -[(column_list)] -[set (k1 = fun (k2)]] - -Explain: -file_path: - -File paths can be specified to a file, or * wildcards can be used to specify all files in a directory. Wildcards must match to files, not directories. - -PARTICIPATION: - -If this parameter is specified, only the specified partition will be imported, and data outside the imported partition will be filtered out. -If not specified, all partitions of the table are imported by default. - -NEGATIVE: -If this parameter is specified, it is equivalent to importing a batch of "negative" data. Used to offset the same batch of data imported before. -This parameter applies only to the case where there are value columns and the aggregation type of value columns is SUM only. - -Column U separator: - -Used to specify the column separator in the import file. Default tot -If the character is invisible, it needs to be prefixed with \x, using hexadecimal to represent the separator. -For example, the separator X01 of the hive file is specified as "\ x01" - -File type: - -Used to specify the type of imported file, such as parquet, orc, csv. The default value is determined by the file suffix name. - -column_list: - -Used to specify the correspondence between columns in the import file and columns in the table. -When you need to skip a column in the import file, specify it as a column name that does not exist in the table. -Grammar: -(col_name1, col_name2, ...) - -SET: - -If this parameter is specified, a column of the source file can be transformed according to a function, and then the transformed result can be imported into the table. -The functions currently supported are: - -Strftime (fmt, column) date conversion function -Fmt: Date format, such as% Y% m% d% H% M% S (year, month, day, hour, second) -Column: Column in column_list, which is the column in the input file. Storage content should be a digital timestamp. -If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table. - -time_format(output_fmt, input_fmt, column) 日期格式转化 -Output_fmt: Converted date format, such as% Y% m% d% H% M% S (year, month, day, hour, second) -Input_fmt: The date format of the column before transformation, such as% Y% m% d% H% M% S (days, hours, seconds, months, years) -Column: Column in column_list, which is the column in the input file. Storage content should be a date string in input_fmt format. -If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table. - -alignment_timestamp(precision, column) 将时间戳对齐到指定精度 -Precision: year 124month;124day;124hour; -Column: Column in column_list, which is the column in the input file. Storage content should be a digital timestamp. -If there is no column_list, the columns of the input file are entered by default in the column order of the Palo table. -Note: When the alignment accuracy is year and month, only the time stamps in the range of 20050101-20191231 are supported. - -Default_value (value) sets the default value for a column import -Use default values of columns when creating tables without specifying - -Md5sum (column1, column2,...) evaluates the value of the specified imported column to md5sum, returning a 32-bit hexadecimal string - -Replace_value (old_value [, new_value]) replaces old_value specified in the import file with new_value -New_value, if not specified, uses the default value of the column when building the table - -Hll_hash (column) is used to transform a column in a table or data into a data structure of a HLL column - -3. opt_properties - -Used to specify some special parameters. -Grammar: -[PROPERTIES ("key"="value", ...)] - -The following parameters can be specified: -Cluster: Import the Hadoop computed queue used. -Timeout: Specifies the timeout time of the import operation. The default timeout is 3 days. Unit seconds. -Max_filter_ratio: The ratio of data that is most tolerant of being filterable (for reasons such as data irregularities). Default zero tolerance. -Load_delete_flag: Specifies whether the import deletes data by importing the key column, which applies only to UNIQUE KEY. -Value column is not specified when importing. The default is false. - -5. Import data format sample - -Integer classes (TINYINT/SMALLINT/INT/BIGINT/LARGEINT): 1,1000,1234 -Floating Point Class (FLOAT/DOUBLE/DECIMAL): 1.1, 0.23, 356 -Date class (DATE/DATETIME): 2017-10-03, 2017-06-13 12:34:03. -(Note: If it's in other date formats, you can use strftime or time_format functions to convert in the import command) -字符串类(CHAR/VARCHAR): "I am a student", "a" -NULL value: N - -6. S3 Storage - fs.s3a.access.key user AK,required - fs.s3a.secret.key user SK,required - fs.s3a.endpoint user endpoint,required - fs.s3a.impl.disable.cache whether disable cache,default true,optional - -'35;'35; example - -1. Import a batch of data, specify timeout time and filtering ratio. Specify the import queue as my_cluster. - -LOAD LABEL example db.label1 -( -DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") -INTO TABLE `my_table` -) -PROPERTIES -( -"cluster" ="my" cluster, -Timeout ="3600", -"max_filter_ratio" = "0.1" -); - -Where hdfs_host is the host of the namenode and hdfs_port is the fs.defaultFS port (default 9000) - -2. Import a batch of data, including multiple files. Import different tables, specify separators, and specify column correspondences - -LOAD LABEL example db.label2 -( -DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file1") -INTO TABLE `my_table_1` -COLUMNS TERMINATED BY "," -(k1, k3, k2, v1, v2), -DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file2") -INTO TABLE `my_table_2` -COLUMNS TERMINATED BY "\t" -(k1, k2, k3, v2, v1) -); - -3. Import a batch of data, specify hive's default delimiter x01, and use wildcard * to specify all files in the directory - -LOAD LABEL example db.label3 -( -DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*") -NEGATIVE -INTO TABLE `my_table` -COLUMNS TERMINATED BY "\\x01" -); - -4. Import a batch of "negative" data - -LOAD LABEL example db.label4 -( -DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file) -NEGATIVE -INTO TABLE `my_table` -COLUMNS TERMINATED BY "\t" -); - -5. Import a batch of data and specify partitions - -LOAD LABEL example db.label5 -( -DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") -INTO TABLE `my_table` -PARTITION (p1, P2) -COLUMNS TERMINATED BY "," -(k1, k3, k2, v1, v2) -); - -6. Import a batch of data, specify partitions, and make some transformations to the columns of the imported files, as follows: -The table structure is as follows: -K1 date -date -k3 bigint -k4 varchar (20) -k5 varchar (64) -k6 int - -Assume that the data file has only one row of data, five columns, and comma-separated: - -1537002087,2018-08-09 11:12:13,1537002087,-,1 - -The columns in the data file correspond to the columns specified in the import statement: -tmp -u k1, tmp -u k2, tmp u k3, k6, v1 - -The conversion is as follows: - -1) k1: Transform tmp_k1 timestamp column into datetime type data -2) k2: Converting tmp_k2 datetime-type data into date data -3) k3: Transform tmp_k3 timestamp column into day-level timestamp -4) k4: Specify import default value of 1 -5) k5: Calculate MD5 values from tmp_k1, tmp_k2, tmp_k3 columns -6) k6: Replace the - value in the imported file with 10 - -LOAD LABEL example db.label6 -( -DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") -INTO TABLE `my_table` -PARTITION (p1, P2) -COLUMNS TERMINATED BY "," -(tmp /u k1, tmp /u k2, tmp /u k3, k6, v1) -SET ( -K1 = strftime (%Y -%m -%d%H:%M:%S ", TMP u K1), -K2 = Time = UFormat ("% Y-% M-% D% H:% M:% S", "% Y-% M-% D", "TMP = UK2), -k3 = alignment_timestamp("day", tmp_k3), -k4 = default_value("1"), -K5 = MD5Sum (TMP = UK1, TMP = UK2, TMP = UK3) -k6 = replace value ("-", "10") -) -); - -7. Import data into tables containing HLL columns, which can be columns in tables or columns in data - -LOAD LABEL example db.label7 -( -DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") -INTO TABLE `my_table` -PARTITION (p1, P2) -COLUMNS TERMINATED BY "," -SET ( -v1 = hll, u hash (k1), -v2 = hll, u hash (k2) -) -); - -## keyword -LOAD - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md deleted file mode 100644 index 0547e95b07..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md +++ /dev/null @@ -1,132 +0,0 @@ ---- -{ - "title": "MINI LOAD", - "language": "en" -} ---- - - - -# MINI LOAD -## Description - -MINI LOAD and STEAM LOAD are implemented in exactly the same way. MINI LOAD is a subset of STREAM LOAD in import support. -Subsequent imports of new features will only be supported in STEAM LOAD, MINI LOAD will no longer add features. It is suggested that STREAM LOAD be used instead. Please use HELP STREAM LOAD. - -MINI LOAD is imported through HTTP protocol. Users can import without relying on Hadoop or Mysql client. -The user describes the import through HTTP protocol, and the data is streamed into Doris in the process of receiving http requests. After the ** import job is completed, the ** returns to the user the imported results. - -* Note: In order to be compatible with the old version of mini load usage habits, users can still view the import results through the 'SHOW LOAD' command. - -Grammar: -Import: - -curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table}/_load?label=xxx - -View import information - -curl -u user:passwd http://host:port/api/{db}/_load_info?label=xxx - -HTTP Protocol Specification - -Privilege Authentication Currently Doris uses the Basic mode of HTTP for privilege authentication. So you need to specify a username and password when importing -This way is to pass the password in plaintext, and does not support encrypted transmission for the time being. - -Expect Doris needs to send an HTTP request with the 'Expect' header information,'100-continue'. -Why? Because we need to redirect the request, we have to transfer the data content before. -This can avoid causing multiple data transmission, thereby improving efficiency. - -Content-Length Doris needs to send a request with the header 'Content-Length'. If the content ratio is sent -'Content-Length' is less, so Doris believes that if there is a transmission problem, the submission task fails. -NOTE: If you send more data than 'Content-Length', Doris reads only 'Content-Length'. -Length content and import - - -Description of parameters: - -User: User is user_name if the user is in default_cluster. Otherwise, it is user_name@cluster_name. - -Label: The label used to specify this batch of imports for later job queries, etc. -This parameter must be passed in. - -Columns: Used to describe the corresponding column name in the import file. -If it is not passed in, the column order in the file is considered to be the same as the order in which the table is built. -The specified method is comma-separated, such as columns = k1, k2, k3, K4 - -Column_separator: Used to specify the separator between columns, default is' t' -NOTE: Url encoding is required, for example -If you need to specify '\t' as a separator, you should pass in 'column_separator=% 09' -If you need to specify 'x01'as a delimiter, you should pass in 'column_separator=% 01' -If you need to specify','as a separator, you should pass in 'column_separator=% 2c' - - -Max_filter_ratio: Used to specify the maximum percentage allowed to filter irregular data, default is 0, not allowed to filter -Custom specification should be as follows:'max_filter_ratio = 0.2', meaning that 20% error rate is allowed. - -Timeout: Specifies the timeout time of the load job in seconds. When the load execution time exceeds this threshold, it is automatically cancelled. The default timeout time is 86400 seconds. -It is recommended to specify a timeout time of less than 86400 seconds. - -Hll: Used to specify the corresponding relationship between the HLL columns in the data and the tables, the columns in the tables and the columns specified in the data. -(If columns are not specified, the columns of the data column surface can also be other non-HLL columns in the table.) By "partition" -Specify multiple HLL columns using ":" splitting, for example:'hll1, cuid: hll2, device' - -NOTE: -1. This method of importing is currently completed on a single machine, so it is not suitable to import a large amount of data. -It is recommended that the amount of data imported should not exceed 1 GB. - -2. Currently, it is not possible to submit multiple files in the form of `curl-T', `{file1, file2}', because curl splits them into multiple files. -Request sent, multiple requests cannot share a label number, so it cannot be used - -3. Miniload is imported in exactly the same way as streaming. It returns the results synchronously to users after the import of streaming is completed. -Although the information of mini load can be found in subsequent queries, it cannot be operated on. The queries are only compatible with the old ways of use. - -4. When importing from the curl command line, you need to add escape before & or the parameter information will be lost. - -'35;'35; example - -1. Import the data from the local file 'testData' into the table of 'testTbl' in the database 'testDb'(the user is in default_cluster) -curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123 - -2. Import the data from the local file 'testData' into the table of 'testTbl' in the database'testDb'(the user is in test_cluster). The timeout time is 3600 seconds. -curl --location-trusted -u root@test_cluster:root -T testData http://fe.host:port/api/testDb/testTbl/_load?label=123&timeout=3600 - -3. Import data from the local file 'testData' into the 'testTbl' table in the database 'testDb', allowing a 20% error rate (the user is in default_cluster) -curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 - -4. Import the data from the local file 'testData' into the table 'testTbl' in the database 'testDb', allowing a 20% error rate, and specify the column name of the file (the user is in default_cluster) -curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2\&columns=k1,k2,k3 - -5. Import in streaming mode (user is in default_cluster) -seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_load?label=123 - -6. Import tables containing HLL columns, which can be columns in tables or columns in data to generate HLL columns (users are in default_cluster) - - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2\&hll=hll_column1,k1:hll_column2,k2 - \&columns=k1,k2,k3 - - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 - \&hll=hll_column1,tmp_k4:hll_column2,tmp_k5\&columns=k1,k2,k3,tmp_k4,tmp_k5 - -7. View imports after submission - -curl -u root http://host:port/api/testDb/_load_info?label=123 - -## keyword -MINI, LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md deleted file mode 100644 index 78ff3edc20..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -{ - "title": "MULTI LOAD", - "language": "en" -} ---- - - - -# MULTI LOAD -## Description - -Syntax: -curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_start?label=xxx -curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table1}/_load?label=xxx\&sub_label=yyy -curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table2}/_load?label=xxx\&sub_label=zzz -curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_commit?label=xxx -curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_desc?label=xxx - -'MULTI LOAD'can support users to import multiple tables at the same time on the basis of'MINI LOAD'. The specific commands are shown above. -'/api/{db}/_multi_start' starts a multi-table import task -'/api/{db}/{table}/_load' adds a table to be imported to an import task. The main difference from 'MINI LOAD' is that the 'sub_label' parameter needs to be passed in. -'/api/{db}/_multi_commit' submits the entire multi-table import task and the background begins processing -'/api/{db}/_multi_abort' Abandons a multi-table import task -'/api/{db}/_multi_desc' shows the number of jobs submitted by a multi-table import task - -HTTP Protocol Specification -Privilege Authentication Currently Doris uses the Basic mode of HTTP for privilege authentication. So you need to specify a username and password when importing -This way is to pass passwords in plaintext, since we are all in the Intranet environment at present... - -Expect Doris needs to send an HTTP request, and needs the 'Expect' header information with the content of'100-continue'. -Why? Because we need to redirect the request, we have to transfer the data content before. -This can avoid causing multiple data transmission, thereby improving efficiency. - -Content-Length Doris needs to send a request with the header 'Content-Length'. If the content ratio is sent -If'Content-Length'is less, Palo believes that if there is a transmission problem, the submission of the task fails. -NOTE: If you send more data than 'Content-Length', Doris reads only 'Content-Length'. -Length content and import - -Description of parameters: -User: User is user_name if the user is in default_cluster. Otherwise, it is user_name@cluster_name. - -Label: Used to specify the label number imported in this batch for later job status queries, etc. -This parameter must be passed in. - -Sub_label: Used to specify a subversion number within a multi-table import task. For multi-table imported loads, this parameter must be passed in. - -Columns: Used to describe the corresponding column name in the import file. -If it is not passed in, the column order in the file is considered to be the same as the order in which the table is built. -The specified method is comma-separated, such as columns = k1, k2, k3, K4 - -Column_separator: Used to specify the separator between columns, default is' t' -NOTE: Url encoding is required, such as specifying '\t'as a delimiter. -Then you should pass in 'column_separator=% 09' - -Max_filter_ratio: Used to specify the maximum percentage allowed to filter irregular data, default is 0, not allowed to filter -Custom specification should be as follows:'max_filter_ratio = 0.2', meaning that 20% error rate is allowed. -Pass in effect at'_multi_start' - -NOTE: -1. This method of importing is currently completed on a single machine, so it is not suitable to import a large amount of data. -It is recommended that the amount of data imported should not exceed 1GB - -2. Currently, it is not possible to submit multiple files in the form of `curl-T', `{file1, file2}', because curl splits them into multiple files. -Request sent, multiple requests cannot share a label number, so it cannot be used - -3. Supports streaming-like ways to use curl to import data into Doris, but Doris will have to wait until the streaming is over -Real import behavior will occur, and the amount of data in this way cannot be too large. - -'35;'35; example - -1. Import the data from the local file 'testData1'into the table of 'testTbl1' in the database 'testDb', and -Import the data from 'testData2'into the table 'testTbl2' in 'testDb'(the user is in default_cluster) -curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 -curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 -curl --location-trusted -u root -T testData2 http://host:port/api/testDb/testTbl2/_load?label=123\&sub_label=2 -curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_commit?label=123 - -2. Multi-table Import Midway Abandon (User in default_cluster) -curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 -curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 -curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_abort?label=123 - -3. Multi-table import to see how much content has been submitted (user is in default_cluster) -curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 -curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 -curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_desc?label=123 - -## keyword -MULTI, MINI, LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/OUTFILE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/OUTFILE.md deleted file mode 100644 index 9a97dffbf6..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/OUTFILE.md +++ /dev/null @@ -1,207 +0,0 @@ ---- -{ - "title": "OUTFILE", - "language": "zh-CN" -} ---- - - - -# OUTFILE -## description - - The `SELECT INTO OUTFILE` statement can export the query results to a file. Currently supports export to remote storage through Broker process, or directly through S3, HDFS protocol such as HDFS, S3, BOS and COS(Tencent Cloud) through the Broker process. The syntax is as follows: - - Grammar: - query_stmt - INTO OUTFILE "file_path" - [format_as] - [properties] - - 1. file_path - `file_path` specify the file path and file name prefix. Like: `hdfs://path/to/my_file_`. - The final file name will be assembled as `my_file_`, file seq no and the format suffix. File seq no starts from 0, determined by the number of split. - my_file_abcdefg_0.csv - my_file_abcdefg_1.csv - my_file_abcdegf_2.csv - - 2. format_as - FORMAT AS CSV - Specify the export format. The default is CSV. - - - 3. properties - Specify the relevant attributes. Currently it supports exporting through the Broker process, or through the S3, HDFS protocol. - - Grammar: - [PROPERTIES ("key"="value", ...)] - The following parameters can be specified: - column_separator: Specifies the exported column separator, defaulting to t. Supports invisible characters, such as'\x07'. - line_delimiter: Specifies the exported line separator, defaulting to\n. Supports invisible characters, such as'\x07'. - max_file_size: max size for each file - - Broker related attributes need to be prefixed with `broker.`: - broker.name: broker name - broker.hadoop.security.authentication: Specify authentication as kerberos - broker.kerberos_principal: Specify the principal of kerberos - broker.kerberos_keytab: Specify the keytab path of kerberos, this file is the path on the broker. - - HDFS protocal can directly execute HDFS protocal configuration: - hdfs.fs.defaultFS: namenode ip and port - hdfs.hdfs_user: hdfs user name - - S3 protocol can directly execute S3 protocol configuration: - AWS_ENDPOINT - AWS_ACCESS_KEY - AWS_SECRET_KEY - AWS_REGION - -## example - - 1. Export simple query results to the file `hdfs://path/to/result.txt`. Specify the export format as CSV. Use `my_broker` and set kerberos authentication information. Specify the column separator as `,` and the line delimiter as `\n`. - SELECT * FROM tbl - INTO OUTFILE "hdfs://path/to/result_" - FORMAT AS CSV - PROPERTIES - ( - "broker.name" = "my_broker", - "broker.hadoop.security.authentication" = "kerberos", - "broker.kerberos_principal" = "doris@YOUR.COM", - "broker.kerberos_keytab" = "/home/doris/my.keytab", - "column_separator" = ",", - "line_delimiter" = "\n", - "max_file_size" = "100MB" - ); - If the result is less than 100MB, file will be: `result_0.csv`. - If larger than 100MB, may be: `result_0.csv, result_1.csv, ...`. - - 2. Export simple query results to the file `hdfs://path/to/result.parquet`. Specify the export format as PARQUET. Use `my_broker` and set kerberos authentication information. - SELECT c1, c2, c3 FROM tbl - INTO OUTFILE "hdfs://path/to/result_" - FORMAT AS PARQUET - PROPERTIES - ( - "broker.name" = "my_broker", - "broker.hadoop.security.authentication" = "kerberos", - "broker.kerberos_principal" = "doris@YOUR.COM", - "broker.kerberos_keytab" = "/home/doris/my.keytab", - "schema"="required,int32,c1;required,byte_array,c2;required,byte_array,c2" - ); - If the exported file format is PARQUET, `schema` must be specified. - - 3. Export the query result of the CTE statement to the file `hdfs://path/to/result.txt`. The default export format is CSV. Use `my_broker` and set hdfs high availability information. Use the default column separators and line delimiter. - WITH - x1 AS - (SELECT k1, k2 FROM tbl1), - x2 AS - (SELECT k3 FROM tbl2) - SELEC k1 FROM x1 UNION SELECT k3 FROM x2 - INTO OUTFILE "hdfs://path/to/result_" - PROPERTIES - ( - "broker.name" = "my_broker", - "broker.username"="user", - "broker.password"="passwd", - "broker.dfs.nameservices" = "my_ha", - "broker.dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2", - "broker.dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port", - "broker.dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port", - "broker.dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ); - If the result is less than 1GB, file will be: `result_0.csv`. - If larger than 1GB, may be: `result_0.csv, result_1.csv, ...`. - - 4. Export the query results of the UNION statement to the file `bos://bucket/result.parquet`. Specify the export format as PARQUET. Use `my_broker` and set hdfs high availability information. PARQUET format does not need to specify the column separator and line delimiter. - SELECT k1 FROM tbl1 UNION SELECT k2 FROM tbl1 - INTO OUTFILE "bos://bucket/result_" - FORMAT AS PARQUET - PROPERTIES - ( - "broker.name" = "my_broker", - "broker.bos_endpoint" = "http://bj.bcebos.com", - "broker.bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "broker.bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyyyyyyyy", - "schema"="required,int32,k1;required,byte_array,k2" - ); - - 5. Export simple query results to the file `cos://${bucket_name}/path/result.txt`. Specify the export format as CSV. - And create a mark file after export finished. - select k1,k2,v1 from tbl1 limit 100000 - into outfile "s3a://my_bucket/export/my_file_" - FORMAT AS CSV - PROPERTIES - ( - "broker.name" = "hdfs_broker", - "broker.fs.s3a.access.key" = "xxx", - "broker.fs.s3a.secret.key" = "xxxx", - "broker.fs.s3a.endpoint" = "https://cos.xxxxxx.myqcloud.com/", - "column_separator" = ",", - "line_delimiter" = "\n", - "max_file_size" = "1024MB", - "success_file_name" = "SUCCESS" - ) - Please Note: - 1. Paths that do not exist are automatically created. - 2. These parameters(access.key/secret.key/endpointneed) need to be confirmed with `Tecent Cloud COS`. In particular, the value of endpoint does not need to be filled in bucket_name. - - 6. Use the s3 protocol to export to bos, and concurrent export is enabled. - set enable_parallel_outfile = true; - select k1 from tb1 limit 1000 - into outfile "s3://my_bucket/export/my_file_" - format as csv - properties - ( - "AWS_ENDPOINT" = "http://s3.bd.bcebos.com", - "AWS_ACCESS_KEY" = "xxxx", - "AWS_SECRET_KEY" = "xxx", - "AWS_REGION" = "bd" - ) - The final generated file prefix is `my_file_{fragment_instance_id}_`. - - 7. Use the s3 protocol to export to bos, and enable concurrent export of session variables. - set enable_parallel_outfile = true; - select k1 from tb1 order by k1 limit 1000 - into outfile "s3://my_bucket/export/my_file_" - format as csv - properties - ( - "AWS_ENDPOINT" = "http://s3.bd.bcebos.com", - "AWS_ACCESS_KEY" = "xxxx", - "AWS_SECRET_KEY" = "xxx", - "AWS_REGION" = "bd" - ) - But because the query statement has a top-level sorting node, even if the query is enabled for concurrently exported session variables, it cannot be exported concurrently. - - 8. Use libhdfs to export to hdfs cluster. Export the query results of the UNION statement to the file `hdfs://path/to/result.txt` - Specify the export format as CSV. Use the user name as 'work', the column separators as ',' and line delimiter as '\n'. - SELECT * FROM tbl - INTO OUTFILE "hdfs://path/to/result_" - FORMAT AS CSV - PROPERTIES - ( - "hdfs.fs.defaultFS" = "hdfs://ip:port", - "hdfs.hdfs_user" = "work" - ); - If the result is less than 1GB, file will be: `my_file_0.csv`. - If larger than 1GB, may be: `my_file_0.csv, result_1.csv, ...`. - -## keyword - OUTFILE - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md deleted file mode 100644 index 92c157a434..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -{ - "title": "PAUSE ROUTINE LOAD", - "language": "en" -} ---- - - - -# PAUSE ROUTINE LOAD -## example - -1. Pause routine load named test1; - - PAUSE ROUTINE LOAD FOR test1; - -2. Pause all running routine load; - - PAUSE ALL ROUTINE LOAD; - -## keyword - - PAUSE,ALL,ROUTINE,LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md deleted file mode 100644 index b685270649..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -{ - "title": "PAUSE SYNC JOB", - "language": "en" -} ---- - - - -# PAUSE SYNC JOB - -## description - -Pauses a running SyncJob in the database. - -The paused job will stop synchronizing and keep the latest consumption location until it is resumed by the user. - -Syntax: - - PAUSE SYNC JOB [db.]job_name - -## example - -1. Pause the SyncJob named `job_name`. - - PAUSE SYNC JOB `job_name`; - -## keyword - PAUSE,SYNC,JOB,BINLOG - - \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md b/docs/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md deleted file mode 100644 index e49f931864..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/RESTORE TABLET.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -{ - "title": "RESTORE TABLET", - "language": "en" -} ---- - - - -# RESTORE TABLET -Description - -This function is used to recover the tablet data that was deleted by mistake in the trash directory. - -Note: For the time being, this function only provides an HTTP interface in be service. If it is to be used, -A restore tablet API request needs to be sent to the HTTP port of the be machine for data recovery. The API format is as follows: -Method: Postal -URI: http://be_host:be_http_port/api/restore_tablet?tablet_id=xxx&schema_hash=xxx - -'35;'35; example - -Curl -X POST "http://hostname:8088 /api /restore" tablet? Tablet id =123456 &schema hash =1111111 " -## keyword -RESTORE,TABLET,RESTORE,TABLET diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md deleted file mode 100644 index 26a499b1dc..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -{ - "title": "RESUME ROUTINE LOAD", - "language": "en" -} ---- - - - -# RESUME ROUTINE LOAD -## example - -1. Resume routine load job named test1. - - RESUME ROUTINE LOAD FOR test1; - -2. Resume all paused routine load job. - - RESUME ALL ROUTINE LOAD; - -## keyword - - RESUME,ALL,ROUTINE,LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md deleted file mode 100644 index ef5f7f851a..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -{ - "title": "RESUME SYNC JOB", - "language": "en" -} ---- - - - -# RESUME SYNC JOB - -## description - -Rusumes a paused SyncJob in the database. - -The job will continue to synchronize data from the latest location before the last pause. - -Syntax: - - RESUME SYNC JOB [db.]job_name - -## example - -1. Resume the SyncJob named `job_name` - - RESUME SYNC JOB `job_name`; - -## keyword - RESUME,SYNC,JOB,BINLOG \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md deleted file mode 100644 index c695f2a4a1..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md +++ /dev/null @@ -1,588 +0,0 @@ ---- -{ - "title": "ROUTINE LOAD", - "language": "en" -} ---- - - - -# ROUTINE LOAD -## description - -Routine Load function allows users to submit a resident load task, and continuously load data into Doris by continuously reading data from the specified data source. Currently, only text data format (CSV) data is loaded from Kafka by means of no authentication or SSL authentication. - -Syntax: - -``` -CREATE ROUTINE LOAD [db.]job_name ON tbl_name -[merge_type] -[load_properties] -[job_properties] -FROM data_source -[data_source_properties] -``` - -1. [db.]job_name - - The name of the load job, in the same database, only one job can run with the same name. - -2. tbl_name - - Specifies the name of the table that needs to be loaded. - -3. merge_type - - The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete on condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics - -4. load_properties - - Used to describe the load data. grammar: - - ``` - [column_separator], - [columns_mapping], - [where_predicates], - [delete_on_predicates] - [partitions], - [preceding_predicates] - ``` - - 1. column_separator: - - Specify column separators, such as: - - `COLUMNS TERMINATED BY ","` - - The default is: `\t` - - 2. columns_mapping: - - Specifies the mapping of columns in the source data and defines how the derived columns are generated. - - 1. Map column: - - Specify in order, which columns in the source data correspond to which columns in the destination table. For columns that you want to skip, you can specify a column name that does not exist. - - Suppose the destination table has three columns k1, k2, v1. The source data has 4 columns, of which columns 1, 2, and 4 correspond to k2, k1, and v1, respectively. Write as follows: - - `COLUMNS (k2, k1, xxx, v1)` - - Where xxx is a column that does not exist and is used to skip the third column in the source data. - - 2. Derived columns: - - A column represented in the form of col_name = expr, which we call a derived column. That is, the value of the corresponding column in the destination table is calculated by expr. - - Derived columns are usually arranged after the mapped column. Although this is not mandatory, Doris always parses the mapped columns first and then parses the derived columns. - - Following an example, assume that the destination table also has column 4, v2, which is generated by the sum of k1 and k2. You can write as follows: - - `COLUMNS (k2, k1, xxx, v1, v2 = k1 + k2);` - - 3. where_predicates - - Used to specify filter criteria to filter out unwanted columns. Filter columns can be either mapped columns or derived columns. - - For example, if we only want to load a column with k1 greater than 100 and k2 equal to 1000, we would write as follows: - - `WHERE k1 > 100 and k2 = 1000` - - 4. partitions - - Specifies which partitions of the load destination table. If not specified, it will be automatically loaded into the corresponding partition. - - Example: - - `PARTITION(p1, p2, p3)` - - 5. delete_on_predicates: - - Only used when merge type is MERGE - - 6. preceding_predicates - - Used to filter original data. The original data is the data without column mapping and transformation. The user can filter the data before conversion, select the desired data, and then perform the conversion. - -5. job_properties - - A generic parameter that specifies a routine load job. - - syntax: - - ``` - PROPERTIES ( - "key1" = "val1", - "key2" = "val2" - ) - ``` - - Currently we support the following parameters: - - 1. `desired_concurrent_number` - - The degree of concurrency desired. A routine load job is split into multiple subtasks. This parameter specifies how many tasks can be executed simultaneously in a job. Must be greater than 0. The default is 3. - - This concurrency is not the actual concurrency. The actual concurrency will be considered by the number of nodes in the cluster, the load, and the data source. - - example: - - `"desired_concurrent_number" = "3"` - - 2. `max_batch_interval/max_batch_rows/max_batch_size` - - These three parameters represent: - - 1) The maximum execution time of each subtask, in seconds. The range is 5 to 60. The default is 10. - - 2) The maximum number of rows read per subtask. Must be greater than or equal to 200,000. The default is 200000. - - 3) The maximum number of bytes read per subtask. The unit is byte and the range is 100MB to 1GB. The default is 100MB. - - These three parameters are used to control the execution time and throughput of a subtask. When either one reaches the threshold, the task ends. - - example: - - ``` - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200" - ``` - - 3. `max_error_number` - - The maximum number of error lines allowed in the sampling window. Must be greater than or equal to 0. The default is 0, which means that no error lines are allowed. - - The sampling window is max_batch_rows * 10. That is, if the number of error lines is greater than max_error_number in the sampling window, the routine job will be suspended, and manual intervention is required to check the data quality problem. - - Lines that are filtered by the where condition are not counted as error lines. - - 4. `strict_mode` - - Whether to enable strict mode, the default is disabled. If turned on, the column type transformation of non-null raw data is filtered if the result is NULL. Specified as "strict_mode" = "true" - - 5. `timezone` - - Specifies the time zone in which the job will be loaded. The default by using session variable's timezone. This parameter affects all function results related to the time zone involved in the load. - - 6. `format` - - Specifies the format of the imported data. Support csv and json, the default is csv. - - 7. `jsonpaths` - - There are two ways to import json: simple mode and matched mode. If jsonpath is set, it will be the matched mode import, otherwise it will be the simple mode import, please refer to the example for details. - - 8. `strip_outer_array` - Boolean type, true to indicate that json data starts with an array object and flattens objects in the array object, default value is false. - - 9. `json_root` - json_root is a valid JSONPATH string that specifies the root node of the JSON Document. The default value is "". - - 10. `send_batch_parallelism` - Integer, Used to set the default parallelism for sending batch, if the value for parallelism exceed `max_send_batch_parallelism_per_job` in BE config, then the coordinator BE will use the value of `max_send_batch_parallelism_per_job`. - - 11. `load_to_single_tablet` - Boolean type, True means that one task can only load data to one tablet in the corresponding partition at a time. The default value is false. This parameter can only be set when loading data into the OLAP table with random partition. - -6. data_source - - The type of data source. Current support: - - KAFKA - -7. `data_source_properties` - - Specify information about the data source. - - syntax: - - ``` - ( - "key1" = "val1", - "key2" = "val2" - ) - ``` - - 1. KAFKA data source - - `Kafka_broker_list` - - Kafka's broker connection information. The format is ip:host. Multiple brokers are separated by commas. - - Example: - - `"kafka_broker_list" = "broker1:9092,broker2:9092"` - - 2. `kafka_topic` - - Specify the topic of Kafka to subscribe to. - - Example: - - `"kafka_topic" = "my_topic"` - - 3. `kafka_partitions/kafka_offsets` - - Specify the kafka partition to be subscribed to, and the corresponding star offset for each partition. - - Offset can specify a specific offset from 0 or greater, or: - - 1) OFFSET_BEGINNING: Subscribe from the location where the data is available. - - 2) OFFSET_END: Subscribe from the end. - - 3) Timestamp, the format must be like: "2021-05-11 10:00:00", the system will automatically locate the offset of the first message greater than or equal to the timestamp. - Note that the offset of the timestamp format cannot be mixed with the number type, only one of them can be selected. - - If not specified, all partitions under topic are subscribed by default fromSET_END. - - Example: - - ``` - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,OFFSET_BEGINNING,OFFSET_END" - - "kafka_partitions" = "0,1", - "kafka_offsets" = "2021-05-11 10:00:00, 2021-05-11 11:00:00" - ``` - - 4. property - - Specify custom kafka parameters. - - The function is equivalent to the "--property" parameter in the kafka shel - - When the value of the parameter is a file, you need to add the keyword: "FILE" before the value. - - For information on how to create a file, see "HELP CREATE FILE;" - - For more supported custom parameters, see the configuration items on the nt side in the official CONFIGURATION documentation for librdkafka. - - Example: - - ``` - "property.client.id" = "12345", - "property.ssl.ca.location" = "FILE:ca.pem" - ``` - - 1. When connecting to Kafka using SSL, you need to specify the following parameters: - - ``` - "property.security.protocol" = "ssl", - "property.ssl.ca.location" = "FILE:ca.pem", - "property.ssl.certificate.location" = "FILE:client.pem", - "property.ssl.key.location" = "FILE:client.key", - "property.ssl.key.password" = "abcdefg" - ``` - - among them: - - "property.security.protocol" and "property.ssl.ca.location" are required to indicate the connection method is SSL and the location of the CA certificate. - - If the client authentication is enabled on the Kafka server, you also need to set: - - ``` - "property.ssl.certificate.location" - "property.ssl.key.location" - "property.ssl.key.password" - ``` - - Used to specify the public key of the client, the private key, and the word of the private key. - - 2. Specify the default starting offset for kafka partition - - If kafka_partitions/kafka_offsets is not specified, all partitions are unanmed by default, and you can specify kafka_default_offsets to specify the star offset. The default is OFFSET_END, which starts at the end of the subscription. - - Values: - - 1) OFFSET_BEGINNING: Subscribe from the location where the data is available. - - 2) OFFSET_END: Subscribe from the end. - - 3) Timestamp, the format is the same as kafka_offsets - - Example: - - `"property.kafka_default_offsets" = "OFFSET_BEGINNING"` - `"property.kafka_default_offsets" = "2021-05-11 10:00:00"` - -8. load data format sample - - Integer class (TINYINT/SMALLINT/INT/BIGINT/LARGEINT): 1, 1000, 1234 - - Floating point class (FLOAT/DOUBLE/DECIMAL): 1.1, 0.23, .356 - - Date class (DATE/DATETIME): 2017-10-03, 2017-06-13 12:34:03. - - String class (CHAR/VARCHAR) (without quotes): I am a student, a - - NULL value: \N - -## example - -1. Create a Kafka routine load task named test1 for the example_tbl of example_db. Specify group.id and client.id, and automatically consume all partitions by default, with subscriptions starting at the end (OFFSET_END) - ``` - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100) - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "property.group.id" = "xxx", - "property.client.id" = "xxx" - ); - ``` - -2. Create a Kafka routine load task named test1 for the example_tbl of example_db. The load task is in strict mode. - - ``` - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100), - WHERE k1 > 100 and k2 like "%doris%" - PROPERTIES - ( -     "desired_concurrent_number"="3", -     "max_batch_interval" = "20", -     "max_batch_rows" = "300000", -     "max_batch_size" = "209715200", -     "strict_mode" = "false" - ) - FROM KAFKA - ( -     "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", -     "kafka_topic" = "my_topic", -     "kafka_partitions" = "0,1,2,3", -     "kafka_offsets" = "101,0,0,200" - ); - ``` - -3. load data from Kafka clusters via SSL authentication. Also set the client.id parameter. The load task is in non-strict mode and the time zone is Africa/Abidjan - - ``` - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100), - WHERE k1 > 100 and k2 like "%doris%" - PROPERTIES - ( -     "desired_concurrent_number"="3", -     "max_batch_interval" = "20", -     "max_batch_rows" = "300000", -     "max_batch_size" = "209715200", -     "strict_mode" = "false", -     "timezone" = "Africa/Abidjan" - ) - FROM KAFKA - ( -     "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", -     "kafka_topic" = "my_topic", -     "property.security.protocol" = "ssl", -     "property.ssl.ca.location" = "FILE:ca.pem", -     "property.ssl.certificate.location" = "FILE:client.pem", -     "property.ssl.key.location" = "FILE:client.key", -     "property.ssl.key.password" = "abcdefg", -     "property.client.id" = "my_client_id" - ); - ``` - -4. Create a Kafka routine load task named test1 for the example_tbl of example_db. The load data is a simple json. - - ``` - CREATE ROUTINE LOAD example_db.test_json_label_1 ON table1 - COLUMNS(category,price,author) - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false", - "format" = "json" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2", - "kafka_offsets" = "0,0,0" - ); - ``` - It support two kinds data style: - 1){"category":"a9jadhx","author":"test","price":895} - 2)[ - {"category":"a9jadhx","author":"test","price":895}, - {"category":"axdfa1","author":"EvelynWaugh","price":1299} - ] - -5. Matched load json by jsonpaths. - - ``` - CREATE TABLE `example_tbl` ( - `category` varchar(24) NULL COMMENT "", - `author` varchar(24) NULL COMMENT "", - `timestamp` bigint(20) NULL COMMENT "", - `dt` int(11) NULL COMMENT "", - `price` double REPLACE - ) ENGINE=OLAP - AGGREGATE KEY(`category`,`author`,`timestamp`,`dt`) - COMMENT "OLAP" - PARTITION BY RANGE(`dt`) - (PARTITION p0 VALUES [("-2147483648"), ("20200509")), - PARTITION p20200509 VALUES [("20200509"), ("20200510")), - PARTITION p20200510 VALUES [("20200510"), ("20200511")), - PARTITION p20200511 VALUES [("20200511"), ("20200512"))) - DISTRIBUTED BY HASH(`category`,`author`,`timestamp`) BUCKETS 4 - PROPERTIES ( - "replication_num" = "1" - ); - - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(category, author, price, timestamp, dt=from_unixtime(timestamp, '%Y%m%d')) - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false", - "format" = "json", - "jsonpaths" = "[\"$.category\",\"$.author\",\"$.price\",\"$.timestamp\"]", - "strip_outer_array" = "true" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2", - "kafka_offsets" = "0,0,0" - ); - ``` - For example json data: - [ - {"category":"11","title":"SayingsoftheCentury","price":895,"timestamp":1589191587}, - {"category":"22","author":"2avc","price":895,"timestamp":1589191487}, - {"category":"33","author":"3avc","title":"SayingsoftheCentury","timestamp":1589191387} - ] - - Tips: - 1)If the json data starts as an array and each object in the array is a record, you need to set the strip_outer_array to true to represent the flat array. - 2)If the json data starts with an array, and each object in the array is a record, our ROOT node is actually an object in the array when we set jsonpath. - -6. User specifies the json_root node - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(category, author, price, timestamp, dt=from_unixtime(timestamp, '%Y%m%d')) - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false", - "format" = "json", - "jsonpaths" = "[\"$.category\",\"$.author\",\"$.price\",\"$.timestamp\"]", - "strip_outer_array" = "true", - "json_root" = "$.RECORDS" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2", - "kafka_offsets" = "0,0,0" - ); - For example json data: - { - "RECORDS":[ - {"category":"11","title":"SayingsoftheCentury","price":895,"timestamp":1589191587}, - {"category":"22","author":"2avc","price":895,"timestamp":1589191487}, - {"category":"33","author":"3avc","title":"SayingsoftheCentury","timestamp":1589191387} - ] - } - - 7. Create a Kafka routine load task named test1 for the example_tbl of example_db. delete all data key columns match v3 >100 key columns. - - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - WITH MERGE - COLUMNS(k1, k2, k3, v1, v2, v3), - WHERE k1 > 100 and k2 like "%doris%", - DELETE ON v3 >100 - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,0,200" - ); - - 8. Filter original data - - CREATE ROUTINE LOAD example_db.test_job ON example_tbl - COLUMNS TERMINATED BY ",", - COLUMNS(k1,k2,source_sequence,v1,v2), - PRECEDING FILTER k1 > 2 - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "30", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200" - ) FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,0,200" - ); - - 9. Start consumption from the specified point in time - - CREATE ROUTINE LOAD example_db.test_job ON example_tbl - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "30", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200" - ) FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "property.kafka_default_offsets" = "2021-10-10 11:00:00" - ); - -## keyword - - CREATE, ROUTINE, LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md deleted file mode 100644 index 37fb8f3192..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -{ - "title": "SHOW ALTER", - "language": "en" -} ---- - - - -# SHOW ALTER -## Description -This statement is used to show the execution of various modification tasks currently under way. -Grammar: -SHOW ALTER [CLUSTER | TABLE [COLUMN | ROLLUP] [FROM db_name]]; - -Explain: -TABLE COLUMN: Shows the task of alter table column. - Support grammar [WHERE TableName|CreateTime|FinishTime|State] [ORDER BY] [LIMIT] -TABLE ROLLUP: Shows the task of creating or deleting ROLLUP index -If db_name is not specified, use the current default DB -CLUSTER: Show the cluster operation related tasks (only administrators use! To be realized... - -## example -1. Show the task execution of all modified columns of default DB -SHOW ALTER TABLE COLUMN; - -2. Show the last task execution of modified columns of some table -SHOW ALTER TABLE COLUMN WHERE TableName = "table1" ORDER BY CreateTime LIMIT 1; - -3. Show the execution of tasks to create or delete ROLLUP index for specified DB -SHOW ALTER TABLE ROLLUP FROM example_db; - -4. Show cluster operations related tasks (only administrators use! To be realized... -SHOW ALTER CLUSTER; - -## keyword -SHOW,ALTER - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md deleted file mode 100644 index be076aa55e..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -{ - "title": "SHOW BACKUP", - "language": "en" -} ---- - - - -# SHOW BACKUP -## Description -This statement is used to view BACKUP tasks -Grammar: -SHOW BACKUP [FROM db_name] - -Explain: -1. Only the last BACKUP task is saved in Palo. -2. Each column has the following meanings: -JobId: Unique job ID -SnapshotName: The name of the backup -DbName: Subordinate database -State: Current phase -PENDING: The initial state after submitting a job -SNAPSHOTTING: In the execution snapshot -UPLOAD_SNAPSHOT: Snapshot completed, ready for upload -UPLOADING: Snapshot uploading -SAVE_META: Save job meta-information as a local file -UPLOAD_INFO: Upload job meta-information -FINISHED: Operation Successful -CANCELLED: Job Failure -Backup Objs: Backup tables and partitions -CreateTime: Task submission time -Snapshot Finished Time: Snapshot completion time -Upload Finished Time: Snapshot Upload Completion Time -FinishedTime: Job End Time -Unfinished Tasks: The unfinished sub-task ID is displayed in the SNAPSHOTTING and UPLOADING phases -Status: Display failure information if the job fails -Timeout: Job timeout, per second - -## example -1. See the last BACKUP task under example_db. -SHOW BACKUP FROM example_db; - -## keyword -SHOW, BACKUP diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md deleted file mode 100644 index 7cbdf0ce51..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -{ - "title": "SHOW CREATE FUNCTION", - "language": "en" -} ---- - - - -# SHOW CREATE FUNCTION -## description - The statement is used to show the creation statement of user-defined function - grammar: - SHOW CREATE FUNTION function_name(arg_type [, ...]) [FROM db_name]]; - - Description: - `function_name`: the name of the function to be displayed - `arg_type`: the parameter list of the function to be displayed - If you do not specify db_name, use the current default db - -## example - 1. Show the creation statement of the specified function under the default db - SHOW CREATE FUNCTION my_add(INT, INT) - -## keyword - SHOW,CREATE,FUNCTION \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md deleted file mode 100644 index 80df442139..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ -"title": "SHOW CREATE ROUTINE LOAD", -"language": "en" -} ---- - - - -# SHOW CREATE ROUTINE LOAD -## description - The statement is used to show the routine load job creation statement of user-defined. - - The kafka partition and offset in the result show the currently consumed partition and the corresponding offset to be consumed. - - grammar: - SHOW [ALL] CREATE ROUTINE LOAD for load_name; - - Description: - `ALL`: optional,Is for getting all jobs, including history jobs - `load_name`: routine load name - -## example - 1. Show the creation statement of the specified routine load under the default db - SHOW CREATE ROUTINE LOAD for test_load - -## keyword - SHOW,CREATE,ROUTINE,LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md deleted file mode 100644 index c7511f338e..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -{ - "title": "SHOW DATA SKEW", - "language": "en" -} ---- - - - -# SHOW DATA SKEW -## description - - This statement is used to view the data skew of a table or a partition. - - grammar: - - SHOW DATA SKEW FROM [db_name.]tbl_name [PARTITION (p1)]; - - Description: - - 1. Only one partition must be specified. For non-partitioned tables, the partition name is the same as the table name. - 2. The result will show the data volume of each bucket under the specified partition, and the proportion of the data volume of each bucket in the total data volume. - -## example - - 1. View the data skew of the table - - SHOW DATA SKEW FROM db1.test PARTITION(p1); - -## keyword - - SHOW, DATA, SKEW - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md deleted file mode 100644 index 71d7e8c920..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -{ - "title": "SHOW DATA", - "language": "en" -} ---- - - - -# SHOW DATA - -## Description - -This statement is used to show the amount of data, the number of replica and num of rows. - -Syntax: - -``` -SHOW DATA [FROM db_name[.table_name]] [ORDER BY ...]; -``` - -Explain: - -1. If the FROM clause is not specified, the amount of data and the number of copies subdivided into each table under the current db are displayed. The data volume is the total data volume of all replicas. The number of replicas is of all partitions of the table and all materialized views. - -2. If the FROM clause is specified, the amount of data, the number of replicas, and the number of statistical rows subdivided into individual materialized views under table are displayed. The data volume is the total data volume of all replicas. The number of replicas is corresponding to all partitions of the materialized view. The number of statistical rows is corresponding to all partitions of the materialized view. - -3. When counting the number of rows, the replica with the largest number of rows among multiple replicas shall prevail. - -4. The `Total` row in the result set represents the summary row. The `Quota` row indicates the current quota of the database. The `Left` line indicates the remaining quota. - -5. If you want to check the size of each Partition, please refer to `help show partitions`. - -6. Arbitrary column combinations can be sorted using ORDER BY. - -## example - -1. Display the data volume, replica size, aggregate data volume and aggregate replica count of each table of default DB. - - ``` - SHOW DATA; - ``` - - ``` - +-----------+-------------+--------------+ - | TableName | Size | ReplicaCount | - +-----------+-------------+--------------+ - | tbl1 | 900.000 B | 6 | - | tbl2 | 500.000 B | 3 | - | Total | 1.400 KB | 9 | - | Quota | 1024.000 GB | 1073741824 | - | Left | 1021.921 GB | 1073741815 | - +-----------+-------------+--------------+ - ``` - -2. Display the subdivision data volume, replica count and number of rows of the specified table below the specified DB. - - ``` - SHOW DATA FROM example_db.test; - ``` - ``` - +-----------+-----------+-----------+--------------+----------+ - | TableName | IndexName | Size | ReplicaCount | RowCount | - +-----------+-----------+-----------+--------------+----------+ - | test | r1 | 10.000MB | 30 | 10000 | - | | r2 | 20.000MB | 30 | 20000 | - | | test2 | 50.000MB | 30 | 50000 | - | | Total | 80.000 | 90 | | - +-----------+-----------+-----------+--------------+----------+ - ``` -3. Can be combined and sorted according to the data volume, replica count,and number of rows,etc. - - ``` - SHOW DATA ORDER BY ReplicaCount desc,Size asc; - ``` - - ``` - +-----------+-------------+--------------+ - | TableName | Size | ReplicaCount | - +-----------+-------------+--------------+ - | table_c | 3.102 KB | 40 | - | table_d | .000 | 20 | - | table_b | 324.000 B | 20 | - | table_a | 1.266 KB | 10 | - | Total | 4.684 KB | 90 | - | Quota | 1024.000 GB | 1073741824 | - | Left | 1024.000 GB | 1073741734 | - +-----------+-------------+--------------+ - ``` - -## keyword - - SHOW,DATA diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md deleted file mode 100644 index 87989ba692..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW DATABASE", - "language": "en" -} ---- - - - -# SHOW DATABASE ID -## Description -This statement is used to display database name according to database id (for administrators only) -Grammar: -SHOW DATABASE [database_id] - -## example -1. Display database name according to database id -SHOW DATABASE 1001; - -## keyword -SHOW,DATABASE diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md deleted file mode 100644 index b9ebd36273..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -{ - "title": "SHOW DATABASES", - "language": "en" -} ---- - - - -# SHOW DATABASES -## Description -This statement is used to show the currently visible DB -Grammar: -SHOW DATABASES; - -## keyword -SHOW,DATABASES - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md deleted file mode 100644 index 2e397d5223..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "SHOW DELETE", - "language": "en" -} ---- - - - -# SHOW DELETE -## Description -This statement is used to show successful historical delete tasks performed -Grammar: -SHOW DELETE [FROM db_name] - -## example -1. Show all historical delete tasks for database -SHOW DELETE FROM database; - -## keyword -SHOW,DELETE - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md deleted file mode 100644 index ef1900db3c..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -{ - "title": "SHOW DYNAMIC PARTITION TABLES", - "language": "en" -} ---- - - - - # SHOW DYNAMIC PARTITION TABLES -## description - This statement is used to display all dynamically partitioned table states under the current db - Grammar: - SHOW DYNAMIC PARTITION TABLES [FROM db_name]; - - ## example - 1. Displays all dynamically partitioned table states for the database - SHOW DYNAMIC PARTITION TABLES FROM database; - - ## keyword - SHOW,DYNAMIC,PARTITION,TABLES diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md deleted file mode 100644 index aba5fbd983..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -{ - "title": "SHOW EXPORT", - "language": "en" -} ---- - - - -# SHOW EXPORT -## Description -This statement is used to show the execution of the specified export task -Grammar: - SHOW EXPORT - [FROM db_name] - [ - WHERE - [ID = your_job_id] - [STATE = ["PENDING"|"EXPORTING"|"FINISHED"|"CANCELLED"]] - [LABEL [ = "your_label" | LIKE "label_matcher"]] - ] - [ORDER BY ...] - [LIMIT limit]; - -Explain: -1) If db_name is not specified, use the current default DB -2) If STATE is specified, the EXPORT state is matched -3) Any column combination can be sorted using ORDER BY -4) If LIMIT is specified, the limit bar matching record is displayed. Otherwise, all of them will be displayed. - -## example -1. Show all export tasks of default DB - SHOW EXPORT; - -2. Show the export tasks of the specified db, sorted in descending order by StartTime - SHOW EXPORT FROM example_db ORDER BY StartTime DESC; - -3. Show the export task of the specified db, state is "exporting" and sorted in descending order by StartTime - SHOW EXPORT FROM example_db WHERE STATE = "exporting" ORDER BY StartTime DESC; - -4. Show the export task of specifying DB and job_id - SHOW EXPORT FROM example_db WHERE ID = job_id; - -5. Show the export task of specifying DB and label - SHOW EXPORT FROM example_db WHERE LABEL = "mylabel"; - -6. Show the export task of specifying DB and label prefix is "labelprefix" - SHOW EXPORT FROM example_db WHERE LABEL LIKE "labelprefix%"; - -## keyword - - SHOW,EXPORT - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md deleted file mode 100644 index e5ef14acea..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -{ - "title": "SHOW LOAD", - "language": "en" -} ---- - - - -# SHOW LOAD -## Description -This statement is used to show the execution of the specified import task -Grammar: -SHOW LOAD -[FROM both names] -[ -WHERE -[LABEL [ = "your_label" | LIKE "label_matcher"]] -[STATE = ["PENDING"|"ETL"|"LOADING"|"FINISHED"|"CANCELLED"|]] -] -[ORDER BY ...] -[LIMIT limit][OFFSET offset]; - -Explain: -1) If db_name is not specified, use the current default DB -2) If you use LABEL LIKE, the label that matches the import task contains the import task of label_matcher -3) If LABEL = is used, the specified label is matched accurately. -4) If STATE is specified, the LOAD state is matched -5) Arbitrary column combinations can be sorted using ORDER BY -6) If LIMIT is specified, the limit bar matching record is displayed. Otherwise, all of them will be displayed. -7) If OFFSET is specified, the query results are displayed from offset. By default, the offset is 0. -8) If broker/mini load is used, the connection in the URL column can be viewed using the following command: - -SHOW LOAD WARNINGS ON 'url' - -## example -1. Show all import tasks of default DB -SHOW LOAD; - -2. Show the import task of the specified db. The label contains the string "2014_01_02", showing the oldest 10 -SHOW LOAD FROM example_db WHERE LABEL LIKE "2014_01_02" LIMIT 10; - -3. Show the import task of the specified db, specify label as "load_example_db_20140102" and sort it in descending order by LoadStartTime -SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" ORDER BY LoadStartTime DESC; - -4. Show the import task of the specified db, specify label as "load_example_db_20140102" and state as "load", and sort it in descending order by LoadStartTime -SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" AND STATE = "loading" ORDER BY LoadStartTime DESC; - -5. Show the import task of the specified dB and sort it in descending order by LoadStartTime, and display 10 query results starting with offset 5 -SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 5,10; -SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 10 offset 5; - -6. Small batch import is a command to view the import status -curl --location-trusted -u {user}:{passwd} http://{hostname}:{port}/api/{database}/_load_info?label={labelname} - -## keyword -SHOW,LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md deleted file mode 100644 index e443da46a3..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW PARTITION", - "language": "en" -} ---- - - - -# SHOW PARTITION ID -## Description -This statement is used to display database name, table name, partition name according to partition id (for administrators only) -Grammar: -SHOW PARTITION [partition_id] - -## example -1. Display database name, table name, partition name according to partition id -SHOW PARTITION 10002; - -## keyword -SHOW,PARTITION diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md deleted file mode 100644 index 0a75f56116..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -{ - "title": "SHOW PARTITIONS", - "language": "en" -} ---- - - - -# SHOW PARTITIONS -## Description -This statement is used to display partition information -Grammar: -SHOW PARTITIONS FROM [db_name.]table_name [WHERE] [ORDER BY] [LIMIT]; -Explain: -Support filter with following columns: PartitionId,PartitionName,State,Buckets,ReplicationNum, -LastConsistencyCheckTime - -## example -1. Display partition information for the specified table below the specified DB -SHOW PARTITIONS FROM example_db.table_name; - -2. Display information about the specified partition of the specified table below the specified DB -SHOW PARTITIONS FROM example_db.table_name WHERE PartitionName = "p1"; - -3. Display information about the newest partition of the specified table below the specified DB -SHOW PARTITIONS FROM example_db.table_name ORDER BY PartitionId DESC LIMIT 1; - -## keyword -SHOW,PARTITIONS - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md deleted file mode 100644 index 3c55e9f6c2..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -{ - "title": "SHOW PROPERTY", - "language": "en" -} ---- - - - -# SHOW PROPERTY -## Description -This statement is used to view user attributes -Grammar: -SHOW PROPERTY [FOR user] [LIKE key] - -## example -1. View the attributes of the jack user -SHOW PROPERTY FOR 'jack' - -2. View Jack user import cluster related properties -SHOW PROPERTY FOR 'jack' LIKE '%load_cluster%' - -## keyword -SHOW, PROPERTY - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md deleted file mode 100644 index 24c1197ea6..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "SHOW REPOSITORIES", - "language": "en" -} ---- - - - -# SHOW REPOSITORIES -## Description -This statement is used to view the currently created warehouse. -Grammar: -SHOW REPOSITORIES; - -Explain: -1. Each column has the following meanings: -RepoId: Unique Warehouse ID -RepoName: Warehouse name -CreateTime: The first time the warehouse was created -IsReadOnly: Is it a read-only warehouse? -Location: The root directory in the repository for backing up data -Broker: Dependent Broker -ErrMsg: Palo regularly checks the connectivity of the warehouse, and if problems occur, error messages are displayed here. - -## example -1. View the warehouse that has been created: -SHOW REPOSITORIES; - -## keyword -SHOW, REPOSITORY, REPOSITORIES - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md deleted file mode 100644 index 13c248a976..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -{ - "title": "SHOW RESTORE", - "language": "en" -} ---- - - - -# SHOW RESTORE -## Description -This statement is used to view RESTORE tasks -Grammar: -SHOW RESTORE [FROM db_name] - -Explain: -1. Palo -20165;- 20445;- 233844;-36817;- 27425RESTORE -21153s; -2. Each column has the following meanings: -JobId: Unique job ID -Label: The name of the backup to be restored -Timestamp: Time version of backup to be restored -DbName: Subordinate database -State: Current phase -PENDING: The initial state after submitting a job -SNAPSHOTTING: In the execution snapshot -DOWNLOAD: The snapshot is complete, ready to download the snapshot in the warehouse -DOWNLOADING: Snapshot Download -COMMIT: Snapshot download completed, ready to take effect -COMMITTING: In force -FINISHED: Operation Successful -CANCELLED: Job Failure -AllowLoad: Is import allowed on recovery (currently not supported) -ReplicationNum: Specifies the number of replicas recovered -Restore Jobs: Tables and partitions to be restored -CreateTime: Task submission time -MetaPreparedTime: Metadata Readiness Completion Time -Snapshot Finished Time: Snapshot completion time -Download Finished Time: Snapshot download completion time -FinishedTime: Job End Time -Unfinished Tasks: The unfinished sub-task ID is displayed in the SNAPSHOTTING, DOWNLOADING, and COMMITTING phases -Status: Display failure information if the job fails -Timeout: Job timeout, per second - -## example -1. Check the last RESTORE task under example_db. -SHOW RESTORE FROM example_db; - -## keyword -SHOW, RESTORE - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md deleted file mode 100644 index e44d3c9246..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -{ - "title": "SHOW ROUTINE LOAD TASK", - "language": "en" -} ---- - - - -# SHOW ROUTINE LOAD TASK -## example - -1. Show sub-task information for a routine import task called test 1. - -SHOW ROUTINE LOAD TASK WHERE JobName = "test1"; - -## keyword -SHOW,ROUTINE,LOAD,TASK diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md deleted file mode 100644 index 66ffbd66b7..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -{ - "title": "SHOW ROUTINE LOAD", - "language": "en" -} ---- - - - -# SHOW ROUTINE LOAD -## description - This statement is used to show the running status of the Routine Load job - grammar: - SHOW [ALL] ROUTINE LOAD [FOR jobName] [LIKE pattern]; - - Result description: - - Id: Job ID - Name: job name - CreateTime: Job creation time - PauseTime: Last job pause time - EndTime: The end time of the job - DbName: corresponding database name - TableName: Corresponding table name - State: job running status - DataSourceType: Data source type: KAFKA - CurrentTaskNum: current number of subtasks - JobProperties: Job configuration details -DataSourceProperties: Data source configuration details - CustomProperties: custom configuration - Statistic: job running status statistics - Progress: Job running progress - Lag: job delay status -ReasonOfStateChanged: Reason of job status change - ErrorLogUrls: The viewing address of the filtered data with unqualified quality - OtherMsg: Other error messages - - * State - - There are the following 4 states: - - * NEED_SCHEDULE: The job is waiting to be scheduled - * RUNNING: The job is running - * PAUSED: The job is suspended - * STOPPED: The job has ended - * CANCELLED: The job has been cancelled - - * Progress - - For Kafka data sources, the offset currently consumed by each partition is displayed. For example, {"0":"2"} means that the consumption progress of Kafka partition 0 is 2. - - * Lag - - For Kafka data sources, the consumption delay of each partition is displayed. For example, {"0":10} means that the consumption delay of Kafka partition 0 is 10. - -## example - -1. Show all routine import jobs named test 1 (including stopped or cancelled jobs). The result is one or more lines. - -SHOW ALL ROUTINE LOAD FOR test1; - -2. Show the current running routine load job named test1 - -SHOW ROUTINE LOAD FOR test1; - -3. Display all routine import jobs (including stopped or cancelled jobs) under example_db. The result is one or more lines. - -use example_db; -SHOW ALL ROUTINE LOAD; - -4. Display all running routine import jobs under example_db - -use example_db; -SHOW ROUTINE LOAD; - -5. Display the current running routine import job named test1 under example_db - -SHOW ROUTINE LOAD FOR example_db.test1; - -6. Display all routine import jobs named test1 (including stopped or cancelled jobs) under example_db. The result is one or more lines. - -SHOW ALL ROUTINE LOAD FOR example_db.test1; - -7. Show the current running routine load jobs under example_db with name match test1 - -use example_db; -SHOW ROUTINE LOAD LIKE "%test1%"; - -## keyword -SHOW,ROUTINE,LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md deleted file mode 100644 index 84ae5373dc..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -{ - "title": "SHOW SNAPSHOT", - "language": "en" -} ---- - - - -# SHOW SNAPSHOT -## Description -This statement is used to view existing backups in the warehouse. -Grammar: -SHOW SNAPSHOT ON `repo_name` -[WHERE SNAPSHOT = "snapshot" [AND TIMESTAMP = "backup_timestamp"]]; - -Explain: -1. Each column has the following meanings: -Snapshot: The name of the backup -Timestamp: Time version for backup -Status: If the backup is normal, the OK will be displayed, otherwise the error message will be displayed. - -2. If TIMESTAMP is specified, the following additional information will be displayed: -Database: The name of the database where the backup data belongs -Details: Shows the entire backup data directory and file structure in the form of Json - -'35;'35; example -1. Check the existing backups in warehouse example_repo: -SHOW SNAPSHOT ON example_repo; - -2. View only the backup named backup1 in warehouse example_repo: -SHOW SNAPSHOT ON example_repo WHERE SNAPSHOT = "backup1"; - -2. Check the backup named backup1 in the warehouse example_repo for details of the time version "2018-05-05-15-34-26": -SHOW SNAPSHOT ON example_repo -WHERE SNAPSHOT = "backup1" AND TIMESTAMP = "2018-05-05-15-34-26"; - -## keyword -SHOW, SNAPSHOT diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md deleted file mode 100644 index b2704e3ad7..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -{ - "title": "SHOW STREAM LOAD", - "language": "en" -} ---- - - - -# SHOW STREAM LOAD -## Description -This statement is used to show the execution of the specified import task -Grammar: -SHOW STREAM LOAD -[FROM both names] -[ -WHERE -[LABEL [ = "your_label" | LIKE "label_matcher"]] -[STATUS = ["SUCCESS"|"FAIL"|]] -] -[ORDER BY ...] -[LIMIT limit][OFFSET offset]; - -Explain: -1) If db_name is not specified, use the current default DB -2) If you use LABEL LIKE, the label that matches the task contains the STREAM LOAD task of label_matcher -3) If LABEL = is used, the specified label is matched accurately. -4) If STATUS is specified, the STREAM LOAD status is matched -5) Arbitrary column combinations can be sorted using ORDER BY -6) If LIMIT is specified, the limit bar matching record is displayed. Otherwise, all of them will be displayed. -7) If OFFSET is specified, the query results are displayed from offset. By default, the offset is 0. - -## example -1. Show all STREAM LOAD tasks of default DB -SHOW STREAM LOAD; - -2. Show the STREAM LOAD task of the specified db. The label contains the string "2014_01_02", showing the oldest 10 -SHOW STREAM LOAD FROM example_db WHERE LABEL LIKE "2014_01_02" LIMIT 10; - -3. Show the STREAM LOAD task of the specified db, specify label as "load_example_db_20140102" -SHOW STREAM LOAD FROM example_db WHERE LABEL = "load_example_db_20140102"; - -4. Show the STREAM LOAD task of the specified db, specify status as "success", and sort it in descending order by StartTime -SHOW STREAM LOAD FROM example_db WHERE STATUS = "success" ORDER BY StartTime DESC; - -5. Show the STREAM LOAD task of the specified dB and sort it in descending order by StartTime, and display 10 query results starting with offset 5 -SHOW STREAM LOAD FROM example_db ORDER BY StartTime DESC limit 5,10; -SHOW STREAM LOAD FROM example_db ORDER BY StartTime DESC limit 10 offset 5; - -## keyword -SHOW,STREAM LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md deleted file mode 100644 index f496daffa1..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "SHOW SYNC JOB", - "language": "en" -} ---- - - - -# SHOW SYNC JOB - -## description - -This command is used to display the resident data synchronization job status in all databases. - -Syntax: - - SHOW SYNC JOB [FROM db_name] - -## example - -1. Show the status of all SyncJob in the current database. - - SHOW SYNC JOB; - -2. Show status of all SyncJob under databases `test_db`. - - SHOW SYNC JOB FROM `test_db`; - -## keyword - - SHOW,SYNC,JOB,BINLOG \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md deleted file mode 100644 index 08a730bc7a..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -{ - "title": "SHOW TABLE CREATION", - "language": "en" -} ---- - - - -# SHOW TABLE CREATION - -## Description - - This statement is used to show the execution of the specified Iceberg Database table creation task - Syntax. - SHOW TABLE CREATION [FROM db_name] [LIKE table_name_wild]; - - Description. - 1. Usage Notes - 1) If db_name is not specified, the current default db is used. - 2) If you use LIKE, it will match the table creation task with table_name_wild in the table name - 2. The meaning of each column - 1) Database: the name of the database - 2) Table: the name of the table to be created - 3) Status: the creation status of the table, `success`/`fail`. - 4) CreateTime: the time to perform the task of creating the table - 5) Error Msg: Error message of the failed table creation, or empty if it succeeds. - -## example - - 1. Show all the table creation tasks in the default Iceberg db - SHOW TABLE CREATION; - - mysql> show table creation; - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - | Database | Table | Status | Create Time | Error Msg | - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - | default_cluster:iceberg_db | logs_1 | success | 2022-01-24 19:42:45 | | - | default_cluster:iceberg_db | logs | fail | 2022-01-24 19:42:45 | Cannot convert Iceberg type[list] to Doris type. | - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - - 2. Show the table creation tasks in the specified Iceberg db - SHOW TABLE CREATION FROM example_db; - - mysql> show table creation from iceberg_db; - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - | Database | Table | Status | Create Time | Error Msg | - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - | default_cluster:iceberg_db | logs_1 | success | 2022-01-24 19:42:45 | | - | default_cluster:iceberg_db | logs | fail | 2022-01-24 19:42:45 | Cannot convert Iceberg type[list] to Doris type. | - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - - 3. Show table creation tasks for the specified Iceberg db with the string "log" in the table name - SHOW TABLE CREATION FROM example_db LIKE '%log%'; - - mysql> show table creation from iceberg_db like "%1"; - +----------------------------+--------+---------+---------------------+-----------+ - | Database | Table | Status | Create Time | Error Msg | - +----------------------------+--------+---------+---------------------+-----------+ - | default_cluster:iceberg_db | logs_1 | success | 2022-01-24 19:42:45 | | - +----------------------------+--------+---------+---------------------+-----------+ - -## keyword - - SHOW,TABLE CREATION diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md deleted file mode 100644 index f5268730fa..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW TABLE", - "language": "en" -} ---- - - - -# SHOW TABLE ID -## Description -This statement is used to display database name, table name according to table id (for administrators only) -Grammar: -SHOW TABLE [table_id] - -## example -1. Display database name, table name according to table id -SHOW TABLE 10001; - -## keyword -SHOW,TABLE diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md deleted file mode 100644 index e1a5d3a627..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -{ - "title": "SHOW TABLES", - "language": "en" -} ---- - - - -# SHOW TABLES -## Description -This statement is used to show all tables under the current DB -Grammar: -SHOW TABLES; - -## keyword -SHOW,TABLES diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md deleted file mode 100644 index 21ccbccd0a..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "SHOW TABLET", - "language": "en" -} ---- - - - -# SHOW TABLET -## Description - This statement is used to display information of the specified tablet (for administrators only) - Grammar: - SHOW TABLET tablet_id - -## example - - // Display parent level ID information of tablet with specified tablet id of 10000 - SHOW TABLET 10000; - -## keyword -SHOW,TABLET diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md deleted file mode 100644 index 0ed338d5af..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -{ - "title": "SHOW TABLETS", - "language": "en" -} ---- - - - -# SHOW TABLETS -## Description - This statement is used to display tablet-related information (for administrators only) - Grammar: - SHOW TABLETS - [FROM [db_name.]table_name] [partiton(partition_name_1, partition_name_1)] - [where [version=1] [and backendid=10000] [and state="NORMAL|ROLLUP|CLONE|DECOMMISSION"]] - [order by order_column] - [limit [offset,]size] - -## example - // Display all tablets information in the specified table below the specified DB - SHOW TABLETS FROM example_db.table_name; - - SHOW TABLETS FROM example_db.table_name partition(p1, p2); - - // display 10 tablets information in the table - SHOW TABLETS FROM example_db.table_name limit 10; - - SHOW TABLETS FROM example_db.table_name limit 5,10; - - // display the tablets that fulfill some conditions - SHOW TABLETS FROM example_db.table_name where backendid=10000 and version=1 and state="NORMAL"; - - SHOW TABLETS FROM example_db.table_name where backendid=10000 order by version; - - SHOW TABLETS FROM example_db.table_name where indexname="t1_rollup"; - -## keyword - SHOW,TABLETS,LIMIT diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md deleted file mode 100644 index b88322dd39..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md +++ /dev/null @@ -1,100 +0,0 @@ ---- -{ - "title": "SHOW TRANSACTION", - "language": "en" -} ---- - - - -# SHOW TRANSACTION -## description - -This syntax is used to view transaction details for the specified transaction id, label name or transaction status. - -grammar: - -``` -SHOW TRANSACTION -[FROM db_name] -WHERE -[id = transaction_id] -[label = label_name] -[status = transaction_status]; -``` - -Example return result: - -``` -     TransactionId: 4005 -             Label: insert_8d807d5d-bcdd-46eb-be6d-3fa87aa4952d -       Coordinator: FE: 10.74.167.16 - TransactionStatus: VISIBLE - LoadJobSourceType: INSERT_STREAMING -       PrepareTime: 2020-01-09 14:59:07 -     PreCommitTime: 2020-01-09 14:59:07 -        CommitTime: 2020-01-09 14:59:09 -        FinishTime: 2020-01-09 14:59:09 -            Reason: -ErrorReplicasCount: 0 -        ListenerId: -1 -         TimeoutMs: 300000 -``` - -* TransactionId: transaction id -* Label: the label of the corresponding load job -* Coordinator: the node responsible for transaction coordination -* TransactionStatus: transaction status -    * PREPARE: preparation stage -    * PRECOMMITTED: The transaction was precommitted -    * COMMITTED: The transaction was successful, but the data is not visible -    * VISIBLE: The transaction was successful and the data is visible -    * ABORTED: transaction failed -* LoadJobSourceType: The type of the load job. -* PrepareTime: transaction start time -* PreCommitTime: the time when the transaction was precommitted -* CommitTime: the time when the transaction was successfully committed -* FinishTime: The time when the data is visible -* Reason: error message -* ErrorReplicasCount: Number of replicas with errors -* ListenerId: the id of the related load job -* TimeoutMs: transaction timeout time in milliseconds - -## example - -1. View the transaction with id 4005: - - SHOW TRANSACTION WHERE ID = 4005; - -2. Specify the db and view the transaction with id 4005: - - SHOW TRANSACTION FROM db WHERE ID = 4005; - -3. View the transaction with label `label_name`: - - SHOW TRANSACTION WHERE LABEL = 'label_name'; - -4. View the transactions with status `visible`: - - SHOW TRANSACTION WHERE STATUS = 'visible'; - -## keyword - - SHOW, TRANSACTION \ No newline at end of file diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md b/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md deleted file mode 100644 index 5ea8da2db1..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -{ - "title": "SHOW LAST INSERT", - "language": "en" -} ---- - - - -# SHOW LAST INSERT -## description - -This syntax is used to view the result of the last insert operation in the current session connection. - -Syntax: - -``` -SHOW LAST INSERT -``` - -Example of return result. - -``` - TransactionId: 64067 - Label: insert_ba8f33aea9544866-8ed77e2844d0cc9b - Database: default_cluster:db1 - Table: t1 -TransactionStatus: VISIBLE - LoadedRows: 2 - FilteredRows: 0 -``` - -* TransactionId: transaction id -* Label: label corresponding to the insert task -* Database: the database corresponding to the insert -* Table: the table corresponding to the insert -* TransactionStatus: the status of the transaction - * PREPARE: preparation phase - * PRECOMMITTED: Pre-committed stage - * COMMITTED: The transaction is successful, but the data is not visible - * VISIBLE: The transaction is successful and the data is visible - * ABORTED: The transaction failed. -* LoadedRows: Number of rows imported -* FilteredRows: number of rows filtered - -## example - -## keyword - - SHOW, LAST, INSERT diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md deleted file mode 100644 index 487aaaed4c..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -{ - "title": "STOP ROUTINE LOAD", - "language": "en" -} ---- - - - -# STOP ROUTINE LOAD -## example - -1. Stop the routine import job named test 1. - -STOP ROUTINE LOAD FOR test1; - -## keyword -STOP,ROUTINE,LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md b/docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md deleted file mode 100644 index b9fc262d0c..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "STOP SYNC JOB", - "language": "en" -} ---- - - - -# STOP SYNC JOB - -## description - -Stops a uncancelled SyncJob in the database. - -Syntax: - - STOP SYNC JOB [db.]job_name - -## example - -1. Stop the SyncJob named `job_name` - - STOP SYNC JOB `job_name`; - -## keyword - STOP,SYNC,JOB,BINLOG diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md deleted file mode 100644 index f2c90473ad..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md +++ /dev/null @@ -1,286 +0,0 @@ ---- -{ - "title": "STREAM LOAD", - "language": "en" -} ---- - - - -# STREAM LOAD -## description - -NAME - -load data to table in streaming - -SYNOPSIS - -Curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load - -DESCRIPTION - -This statement is used to load data to the specified table. The difference from normal load is that this load method is synchronous load. - -This type of load still guarantees the atomicity of a batch of load tasks, either all data is loaded successfully or all fails. - -This operation also updates the data for the rollup table associated with this base table. - -This is a synchronous operation that returns the results to the user after the entire data load is completed. - -Currently, HTTP chunked and non-chunked uploads are supported. For non-chunked mode, Content-Length must be used to indicate the length of the uploaded content, which ensures data integrity. - -In addition, the user preferably sets the Content of the Expect Header field to 100-continue, which avoids unnecessary data transmission in certain error scenarios. - -OPTIONS - -Users can pass in the load parameters through the Header part of HTTP. - -`label` - -A label that is loaded at one time. The data of the same label cannot be loaded multiple times. Users can avoid the problem of repeated data load by specifying the label. - -Currently Palo internally retains the most recent successful label within 30 minutes. - -`column_separator` - -Used to specify the column separator in the load file. The default is `\t`. If it is an invisible character, you need to add `\x` as a prefix and hexadecimal to indicate the separator. - -For example, the separator `\x01` of the hive file needs to be specified as `-H "column_separator:\x01"`. - -You can use a combination of multiple characters as the column separator. - -`line_delimiter` - -Used to specify the line delimiter in the load file. The default is `\n`. - -You can use a combination of multiple characters as the column separator. - -`columns` - -used to specify the correspondence between the columns in the load file and the columns in the table. If the column in the source file corresponds exactly to the contents of the table, then it is not necessary to specify the contents of this field. If the source file does not correspond to the table schema, then this field is required for some data conversion. There are two forms of column, one is directly corresponding to the field in the load file, directly using the field name to indicate. - -One is a derived column with the syntax `column_name` = expression. Give a few examples to help understand. - -Example 1: There are three columns "c1, c2, c3" in the table. The three columns in the source file correspond to "c3, c2, c1" at a time; then you need to specify `-H "columns: c3, c2, c1"` - -Example 2: There are three columns in the table, "c1, c2, c3". The first three columns in the source file correspond in turn, but there are more than one column; then you need to specify` -H "columns: c1, c2, c3, xxx"` - -The last column can optionally specify a name for the placeholder. - -Example 3: There are three columns in the table, "year, month, day". There is only one time column in the source file, which is "2018-06-01 01:02:03" format. Then you can specify `-H "columns: col, year = year(col), month=month(col), day=day(col)"` to complete the load. - -`where` - -Used to extract some data. If the user needs to filter out the unwanted data, it can be achieved by setting this option. - -Example 1: load only data larger than k1 column equal to 20180601, then you can specify -H "where: k1 = 20180601" when loading - -`max_filter_ratio` - -The maximum proportion of data that can be filtered (for reasons such as data irregularity). The default is zero tolerance. Data non-standard does not include rows that are filtered out by the where condition. - -`Partitions` - -Used to specify the partition designed for this load. If the user is able to determine the partition corresponding to the data, it is recommended to specify the item. Data that does not satisfy these partitions will be filtered out. - -For example, specify load to p1, p2 partition, `-H "partitions: p1, p2"` - -`Timeout` - -Specifies the timeout for the load. Unit seconds. The default is 600 seconds. The range is from 1 second to 259200 seconds. - -`strict_mode` - -The user specifies whether strict load mode is enabled for this load. The default is disabled. Enable it with `-H "strict_mode: true"`. - -`timezone` - -Specifies the time zone used for this load. The default is East Eight District. This parameter affects all function results related to the time zone involved in the load. - -`exec_mem_limit` - -Memory limit. Default is 2GB. Unit is Bytes. - -`format` -Specifies the format of the imported data. Support csv、csv_with_names、csv_with_names_and_types、json, the default is csv - -`jsonpaths` -There are two ways to import json: simple mode and matched mode. If jsonpath is set, it will be the matched mode import, otherwise it will be the simple mode import, please refer to the example for details. - -`strip_outer_array` -Boolean type, true to indicate that json data starts with an array object and flattens objects in the array object, default value is false. - -`json_root` -json_root is a valid JSONPATH string that specifies the root node of the JSON Document. The default value is "". - -`merge_type` - -The type of data merging supports three types: APPEND, DELETE, and MERGE. APPEND is the default value, which means that all this batch of data needs to be appended to the existing data. DELETE means to delete all rows with the same key as this batch of data. MERGE semantics Need to be used in conjunction with the delete condition, which means that the data that meets the delete condition is processed according to DELETE semantics and the rest is processed according to APPEND semantics - -`fuzzy_parse` Boolean type, true to indicate that parse json schema as the first line, this can make import more faster,but need all key keep the order of first line, default value is false. Only use for json format. - - -`num_as_string` Boolean type, true means that when parsing the json data, it will be converted into a number type and converted into a string, and then it will be imported without loss of precision. - -`read_json_by_line`: Boolean type, true means that one json object can be read per line, and the default value is false. - -`send_batch_parallelism`: Integer type, used to set the default parallelism for sending batch, if the value for parallelism exceed `max_send_batch_parallelism_per_job` in BE config, then the coordinator BE will use the value of `max_send_batch_parallelism_per_job`. - -`load_to_single_tablet`: Boolean type, True means that one task can only load data to one tablet in the corresponding partition at a time. The default value is false. This parameter can only be set when loading data into the OLAP table with random partition. - -RETURN VALUES - -After the load is completed, the related content of this load will be returned in Json format. Current field included - -* `Status`: load status. - - * Success: indicates that the load is successful and the data is visible. - - * Publish Timeout: Indicates that the load job has been successfully Commit, but for some reason it is not immediately visible. Users can be considered successful and do not have to retry load - - * Label Already Exists: Indicates that the Label is already occupied by another job, either the load was successful or it is being loaded. The user needs to use the get label state command to determine the subsequent operations. - - * Other: The load failed, the user can specify Label to retry the job. - -* Message: A detailed description of the load status. When it fails, it will return the specific reason for failure. - -* NumberTotalRows: The total number of rows read from the data stream - -* NumberLoadedRows: The number of data rows loaded this time, only valid when Success - -* NumberFilteredRows: The number of rows filtered by this load, that is, the number of rows with unqualified data quality. - -* NumberUnselectedRows: Number of rows that were filtered by the where condition for this load - -* LoadBytes: The amount of source file data loaded this time - -* LoadTimeMs: Time spent on this load - -* BeginTxnTimeMs: The time cost for RPC to Fe to begin a transaction, Unit milliseconds. - -* StreamLoadPutTimeMs: The time cost for RPC to Fe to get a stream load plan, Unit milliseconds. - -* ReadDataTimeMs: Read data time, Unit milliseconds. - -* WriteDataTimeMs: Write data time, Unit milliseconds. - -* CommitAndPublishTimeMs: The time cost for RPC to Fe to commit and publish a transaction, Unit milliseconds. - -* ErrorURL: The specific content of the filtered data, only the first 1000 items are retained - -ERRORS - -You can view the load error details by the following statement: - - ```SHOW LOAD WARNINGS ON 'url'``` - -Where url is the url given by ErrorURL. - -## example - -1. load the data from the local file 'testData' into the table 'testTbl' in the database 'testDb' and use Label for deduplication. Specify a timeout of 100 seconds - - ```Curl --location-trusted -u root -H "label:123" -H "timeout:100" -T testData http://host:port/api/testDb/testTbl/_stream_load``` - -2. load the data in the local file 'testData' into the table of 'testTbl' in the database 'testDb', use Label for deduplication, and load only data with k1 equal to 20180601 -         - ```Curl --location-trusted -u root -H "label:123" -H "where: k1=20180601" -T testData http://host:port/api/testDb/testTbl/_stream_load``` - -3. load the data from the local file 'testData' into the 'testTbl' table in the database 'testDb', allowing a 20% error rate (user is in default_cluster) - - ```Curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -T testData http://host:port/api/testDb/testTbl/_stream_load``` - -4. load the data from the local file 'testData' into the 'testTbl' table in the database 'testDb', allow a 20% error rate, and specify the column name of the file (user is in default_cluster) - - ```Curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -H "columns: k2, k1, v1" -T testData http://host:port/api/testDb/testTbl/_stream_load``` - -5. load the data from the local file 'testData' into the p1, p2 partition in the 'testTbl' table in the database 'testDb', allowing a 20% error rate. - - ```Curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -H "partitions: p1, p2" -T testData http://host:port/api/testDb/testTbl/stream_load``` - -6. load using streaming mode (user is in default_cluster) - - ```Seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load``` - -7. load a table with HLL columns, which can be columns in the table or columns in the data used to generate HLL columns,you can also use hll_empty to supplement columns that are not in the data - - ```Curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load``` - -8. load data for strict mode filtering and set the time zone to Africa/Abidjan - - ```Curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load``` - -9. load a table with BITMAP columns, which can be columns in the table or a column in the data used to generate BITMAP columns, you can also use bitmap_empty to supplement columns that are not in the data - - ```Curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load``` - -10. load a table with QUANTILE_STATE columns, which can be columns in the table or a column in the data used to generate QUANTILE_STATE columns, you can also use TO_QUANTILE_STATE to transfer numberical data to QUANTILE_STATE. 2048 is an optional parameter representing the precision of the TDigest algorithm, the valid value is [2048, 10000], the larger the value, the higher the precision, default is 2048 - - ```Curl --location-trusted -u root -H "columns: k1, k2, v1, v2, v1=to_quantile_state(v1, 2048)" -T testData http://host:port/api/testDb/testTbl/_stream_load``` - -11. a simple load json - table schema: - `category` varchar(512) NULL COMMENT "", - `author` varchar(512) NULL COMMENT "", - `title` varchar(512) NULL COMMENT "", - `price` double NULL COMMENT "" - json data: - {"category":"C++","author":"avc","title":"C++ primer","price":895} - load command by curl: - curl --location-trusted -u root -H "label:123" -H "format: json" -T testData http://host:port/api/testDb/testTbl/_stream_load - In order to improve throughput, it supports importing multiple pieces of json data at one time. Each row is a json object. The default value for line delimeter is `\n`. Need to set read_json_by_line to true. The json data format is as follows: - {"category":"C++","author":"avc","title":"C++ primer","price":89.5} - {"category":"Java","author":"avc","title":"Effective Java","price":95} - {"category":"Linux","author":"avc","title":"Linux kernel","price":195} - -12. Matched load json by jsonpaths - For example json data: - [ - {"category":"xuxb111","author":"1avc","title":"SayingsoftheCentury","price":895}, - {"category":"xuxb222","author":"2avc","title":"SayingsoftheCentury","price":895}, - {"category":"xuxb333","author":"3avc","title":"SayingsoftheCentury","price":895} - ] - Matched imports are made by specifying jsonpath parameter, such as `category`, `author`, and `price`, for example: - curl --location-trusted -u root -H "columns: category, price, author" -H "label:123" -H "format: json" -H "jsonpaths: [\"$.category\",\"$.price\",\"$.author\"]" -H "strip_outer_array: true" -T testData http://host:port/api/testDb/testTbl/_stream_load - Tips: - 1)If the json data starts as an array and each object in the array is a record, you need to set the strip_outer_array to true to represent the flat array. - 2)If the json data starts with an array, and each object in the array is a record, our ROOT node is actually an object in the array when we set jsonpath. - -13. User specifies the json_root node - For example json data: - { - "RECORDS":[ - {"category":"11","title":"SayingsoftheCentury","price":895,"timestamp":1589191587}, - {"category":"22","author":"2avc","price":895,"timestamp":1589191487}, - {"category":"33","author":"3avc","title":"SayingsoftheCentury","timestamp":1589191387} - ] - } - Matched imports are made by specifying jsonpath parameter, such as `category`, `author`, and `price`, for example: - curl --location-trusted -u root -H "columns: category, price, author" -H "label:123" -H "format: json" -H "jsonpaths: [\"$.category\",\"$.price\",\"$.author\"]" -H "strip_outer_array: true" -H "json_root: $.RECORDS" -T testData http://host:port/api/testDb/testTbl/_stream_load - -14. delete all data which key columns match the load data - curl --location-trusted -u root -H "merge_type: DELETE" -T testData http://host:port/api/testDb/testTbl/_stream_load -15. delete all data which key columns match the load data where flag is true, others append - curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv, flag" -H "merge_type: MERGE" -H "delete: flag=1" -T testData http://host:port/api/testDb/testTbl/_stream_load - -## keyword - - STREAM, LOAD diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/UPDATE.md b/docs/en/sql-reference/sql-statements/Data Manipulation/UPDATE.md deleted file mode 100644 index d087659e08..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/UPDATE.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -{ - "title": "UPDATE", - "language": "en" -} ---- - - - -# UPDATE -## description -### Syntax - -``` -UPDATE table_name - SET assignment_list - WHERE expression - -value: - {expr | DEFAULT} - -assignment: - col_name = value - -assignment_list: - assignment [, assignment] ... -``` - -### Parameters - -+ table_name: The target table of the data to be updated. Can be in the form of 'db_name.table_name' -+ assignment_list: The target column to be updated. Can be in the form of 'col_name = value, col_name = value' -+ where expression: The condition to be updated is an expression that returns true or false - -### Note - -The current UPDATE statement only supports row updates on the Unique model, and there may be data conflicts caused by concurrent updates. -Currently Doris does not deal with such problems, and users are required to avoid such problems from the business side. - -## example - -The `test` table is a unique model table, which contains four columns: k1, k2, v1, v2. Among them, k1, k2 are keys, v1, v2 are values, and the aggregation method is Replace. - -1. Update the v1 column that satisfies the conditions k1 =1 and k2 = 2 in the'test' table to 1 - -``` -UPDATE test SET v1 = 1 WHERE k1=1 and k2=2; -``` - -2. Increment the v1 column of the column with k1=1 in the'test' table by 1 - -``` -UPDATE test SET v1 = v1+1 WHERE k1=1; -``` - -## keyword - - UPDATE diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md b/docs/en/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md deleted file mode 100644 index bc226975eb..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md +++ /dev/null @@ -1,115 +0,0 @@ ---- -{ - "title": "ALTER ROUTINE LOAD", - "language": "en" -} ---- - - - -# ALTER ROUTINE LOAD -## description - -This syntax is used to modify a routine import job that has been created. - -Only jobs in the PAUSED state can be modified. - -Syntax: - - ALTER ROUTINE LOAD FOR [db.]job_name - [job_properties] - FROM data_source - [data_source_properties] - -1. `[db.]job_name` - - Specify the name of the job to be modified. - -2. `job_properties` - - Specify the job parameters that need to be modified. Currently only supports the modification of the following parameters: - - 1. `desired_concurrent_number` - 2. `max_error_number` - 3. `max_batch_interval` - 4. `max_batch_rows` - 5. `max_batch_size` - 6. `jsonpaths` - 7. `json_root` - 8. `strip_outer_array` - 9. `strict_mode` - 10. `timezone` - 11. `num_as_string` - 12. `fuzzy_parse` - - -3. `data_source` - - The type of data source. Currently supported: - - KAFKA - -4. `data_source_properties` - - The relevant attributes of the data source. Currently only supports: - - 1. `kafka_partitions` - 2. `kafka_offsets` - 3. `kafka_broker_list` - 4. `kafka_topic` - 5. Custom property, such as `property.group.id` - - Notice: - - 1. `kafka_partitions` and `kafka_offsets` are used to modify the offset of the kafka partition to be consumed, and can only modify the currently consumed partition. Cannot add partition. - -## example - -1. Modify `desired_concurrent_number` to 1 - - ``` - ALTER ROUTINE LOAD FOR db1.label1 - PROPERTIES - ( - "desired_concurrent_number" = "1" - ); - ``` - -2. Modify `desired_concurrent_number` to 10, modify partition offset, and modify group id. - - ``` - ALTER ROUTINE LOAD FOR db1.label1 - PROPERTIES - ( - "desired_concurrent_number" = "10" - ) - FROM kafka - ( - "kafka_partitions" = "0, 1, 2", - "kafka_offsets" = "100, 200, 100", - "property.group.id" = "new_group" - ); - ``` - - -## keyword - - ALTER,ROUTINE,LOAD - diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/insert.md b/docs/en/sql-reference/sql-statements/Data Manipulation/insert.md deleted file mode 100644 index 2f4d06768d..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/insert.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -{ - "title": "INSERT", - "language": "en" -} ---- - - - -# INSERT -## Description -### Syntax - -``` -INSERT INTO table_name -[ PARTITION (p1, ...)] -[ WITH LABEL label] -[ (column [, ...]) ] -[ [ hint [, ...] ] ] -{ VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query } -``` - -### Parameters - -> tablet_name: Target table for loading data. It can be in the form of `db_name.table_name`. -> -> partitions: Specifies the partitions to be loaded, with multiple partition names separated by commas. The partitions must exist in `table_name`, -> -> label: Specifies a label for Insert job. -> -> column_name: The specified destination columns must be columns that exists in `table_name`. -> -> expression: The corresponding expression that needs to be assigned to a column. -> -> DEFAULT: Let the corresponding columns use default values -> -> query: A common query whose results are written to the target -> -> hint: Indicators used to indicate `INSERT` execution. ` Both streaming `and default non `streaming'methods use synchronization to complete `INSERT' statement execution -> The non `streaming'mode returns a label after execution to facilitate users to query the imported status through `SHOW LOAD'. - -### Note - -When the `INSERT'statement is currently executed, the default behavior for data that does not conform to the target table is filtering, such as string length. However, for business scenarios where data is not filtered, the session variable `enable_insert_strict'can be set to `true' to ensure that `INSERT` will not be successfully executed when data is filtered out. - -## example - -` The test `table contains two columns `c1', `c2'. - -1. Import a row of data into the `test` table - -``` -INSERT INTO test VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, DEFAULT); -INSERT INTO test (c1) VALUES (1); -``` - -The first and second sentences have the same effect. When the target column is not specified, the column order in the table is used as the default target column. -The third and fourth statements express the same meaning, using the default value of `c2'column to complete data import. - -2. Import multiline data into the `test` table at one time - -``` -INSERT INTO test VALUES (1, 2), (3, 2 + 2) -INSERT INTO test (c1, c2) VALUES (1, 2), (3, 2 * 2) -INSERT INTO test (c1) VALUES (1), (3) -Insert in test (C1, C2) values (1, Default), (3, Default) -``` - -The effect of the first and second statements is the same, and two data are imported into the `test'table at one time. -The effect of the third and fourth statements is known, using the default value of the `c2'column to import two data into the `test' table. - - -3. Insert into table `test` with a query stmt. - -``` -INSERT INTO test SELECT * FROM test2 -INSERT INTO test (c1, c2) SELECT * from test2 -``` - -4. Insert into table `test` with specified partition and label - -``` -INSERT INTO test PARTITION(p1, p2) WITH LABEL `label1` SELECT * FROM test2; -INSERT INTO test WITH LABEL `label1` (c1, c2) SELECT * from test2; -``` - -Asynchronous imports are, in fact, encapsulated asynchronously by a synchronous import. Filling in streaming is as efficient as not filling in * execution. - -Since Doris used to import asynchronously, in order to be compatible with the old usage habits, the `INSERT'statement without streaming will still return a label. Users need to view the status of the `label' import job through the `SHOW LOAD command. -## keyword -INSERT diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md b/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md deleted file mode 100644 index 33cb1d513b..0000000000 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/lateral-view.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -{ - "title": "Lateral View", - "language": "en" -} ---- - - - -# Lateral View - -## description - -Lateral view syntax can be used with Table Function to fulfill the requirement of expanding one row of data into multiple rows (column to rows). - -grammar: - -``` -... -FROM table_name -lateral_view_ref[ lateral_view_ref ...] - -lateral_view_ref: - -LATERAL VIEW table_function(...) view_alias as col_name -``` - -The Lateral view clause must follow the table name or subquery. Can contain multiple Lateral view clauses. `view_alias` is the name of the corresponding Lateral View. `col_name` is the name of the column produced by the table function `table_function`. - -Table functions currently supported: - -1. `explode_split` -2. `explode_bitmap` -3. `explode_json_array` - -For specific function descriptions, please refer to the corresponding syntax help documentation. - -The data in the table will be Cartesian product with the result set produced by each Lateral View and then return to the upper level. - -## example - -Here, only the syntax example of Lateral View is given. For the specific meaning and output result description, please refer to the help document of the corresponding table function. - -1. - -``` -select k1, e1 from tbl1 -lateral view explode_split(v1,',') tmp1 as e1 where e1 = "abc"; -``` - -2. - -``` -select k1, e1, e2 from tbl2 -lateral view explode_split(v1,',') tmp1 as e1 -lateral view explode_bitmap(bitmap1) tmp2 as e2 -where e2> 3; -``` - -3. - -``` -select k1, e1, e2 from tbl3 -lateral view explode_json_array_int("[1,2,3]") tmp1 as e1 -lateral view explode_bitmap(bitmap_from_string("4,5,6")) tmp2 as e2; -``` - -4. - -``` -select k1, e1 from (select k1, bitmap_union(members) as x from tbl1 where k1=10000 group by k1)tmp1 -lateral view explode_bitmap(x) tmp2 as e1; -``` - -## keyword - - LATERAL, VIEW diff --git a/docs/en/sql-reference/sql-statements/Utility/util_stmt.md b/docs/en/sql-reference/sql-statements/Utility/util_stmt.md deleted file mode 100644 index 955b2dd9c1..0000000000 --- a/docs/en/sql-reference/sql-statements/Utility/util_stmt.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "Describe", - "language": "en" -} ---- - - - -# Describe -## Description -This statement is used to display schema information for the specified table -Grammar: -DESC [FISH] [dbu name.]table name [ALL]; - -Explain: -If ALL is specified, the schema of all indexes of the table is displayed - -## example - -## keyword -DESCRIBE,DESC diff --git a/new-docs/en/summary/basic-summary.md b/docs/en/summary/basic-summary.md similarity index 100% rename from new-docs/en/summary/basic-summary.md rename to docs/en/summary/basic-summary.md diff --git a/new-docs/en/summary/system-architecture.md b/docs/en/summary/system-architecture.md similarity index 95% rename from new-docs/en/summary/system-architecture.md rename to docs/en/summary/system-architecture.md index 2e9b3bb7b9..f7178ee8de 100644 --- a/new-docs/en/summary/system-architecture.md +++ b/docs/en/summary/system-architecture.md @@ -24,4 +24,6 @@ specific language governing permissions and limitations under the License. --> -# Doris system architecture \ No newline at end of file +# Doris system architecture + +(TODO) \ No newline at end of file diff --git a/docs/zh-CN/README.md b/docs/zh-CN/README.md index cf45c79ee8..d9b222a88a 100644 --- a/docs/zh-CN/README.md +++ b/docs/zh-CN/README.md @@ -89,7 +89,7 @@ cases: - logo: /images/home/logo-youdao.png alt: 有道 actionText: 快速上手 → -actionLink: /zh-CN/installing/compilation +actionLink: /zh-CN/get-starting/get-starting articleText: 最新动态 articleLink: /zh-CN/article/article-list --- diff --git a/new-docs/zh-CN/admin-manual/cluster-management/elastic-expansion.md b/docs/zh-CN/admin-manual/cluster-management/elastic-expansion.md similarity index 100% rename from new-docs/zh-CN/admin-manual/cluster-management/elastic-expansion.md rename to docs/zh-CN/admin-manual/cluster-management/elastic-expansion.md diff --git a/new-docs/zh-CN/admin-manual/cluster-management/load-balancing.md b/docs/zh-CN/admin-manual/cluster-management/load-balancing.md similarity index 100% rename from new-docs/zh-CN/admin-manual/cluster-management/load-balancing.md rename to docs/zh-CN/admin-manual/cluster-management/load-balancing.md diff --git a/new-docs/zh-CN/admin-manual/cluster-management/upgrade.md b/docs/zh-CN/admin-manual/cluster-management/upgrade.md similarity index 100% rename from new-docs/zh-CN/admin-manual/cluster-management/upgrade.md rename to docs/zh-CN/admin-manual/cluster-management/upgrade.md diff --git a/new-docs/zh-CN/admin-manual/config/be-config.md b/docs/zh-CN/admin-manual/config/be-config.md similarity index 100% rename from new-docs/zh-CN/admin-manual/config/be-config.md rename to docs/zh-CN/admin-manual/config/be-config.md diff --git a/new-docs/zh-CN/admin-manual/config/fe-config.md b/docs/zh-CN/admin-manual/config/fe-config.md similarity index 100% rename from new-docs/zh-CN/admin-manual/config/fe-config.md rename to docs/zh-CN/admin-manual/config/fe-config.md diff --git a/new-docs/zh-CN/admin-manual/config/user-property.md b/docs/zh-CN/admin-manual/config/user-property.md similarity index 100% rename from new-docs/zh-CN/admin-manual/config/user-property.md rename to docs/zh-CN/admin-manual/config/user-property.md diff --git a/new-docs/zh-CN/admin-manual/data-admin/backup.md b/docs/zh-CN/admin-manual/data-admin/backup.md similarity index 100% rename from new-docs/zh-CN/admin-manual/data-admin/backup.md rename to docs/zh-CN/admin-manual/data-admin/backup.md diff --git a/new-docs/zh-CN/admin-manual/data-admin/delete-recover.md b/docs/zh-CN/admin-manual/data-admin/delete-recover.md similarity index 100% rename from new-docs/zh-CN/admin-manual/data-admin/delete-recover.md rename to docs/zh-CN/admin-manual/data-admin/delete-recover.md diff --git a/new-docs/zh-CN/admin-manual/data-admin/restore.md b/docs/zh-CN/admin-manual/data-admin/restore.md similarity index 100% rename from new-docs/zh-CN/admin-manual/data-admin/restore.md rename to docs/zh-CN/admin-manual/data-admin/restore.md diff --git a/docs/zh-CN/administrator-guide/http-actions/cancel-label.md b/docs/zh-CN/admin-manual/http-actions/cancel-label.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/cancel-label.md rename to docs/zh-CN/admin-manual/http-actions/cancel-label.md diff --git a/docs/zh-CN/administrator-guide/http-actions/check-reset-rpc-cache.md b/docs/zh-CN/admin-manual/http-actions/check-reset-rpc-cache.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/check-reset-rpc-cache.md rename to docs/zh-CN/admin-manual/http-actions/check-reset-rpc-cache.md diff --git a/docs/zh-CN/administrator-guide/http-actions/compaction-action.md b/docs/zh-CN/admin-manual/http-actions/compaction-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/compaction-action.md rename to docs/zh-CN/admin-manual/http-actions/compaction-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/connection-action.md b/docs/zh-CN/admin-manual/http-actions/connection-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/connection-action.md rename to docs/zh-CN/admin-manual/http-actions/connection-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe-get-log-file.md b/docs/zh-CN/admin-manual/http-actions/fe-get-log-file.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe-get-log-file.md rename to docs/zh-CN/admin-manual/http-actions/fe-get-log-file.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/backends-action.md b/docs/zh-CN/admin-manual/http-actions/fe/backends-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/backends-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/backends-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/bootstrap-action.md b/docs/zh-CN/admin-manual/http-actions/fe/bootstrap-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/bootstrap-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/bootstrap-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/cancel-load-action.md b/docs/zh-CN/admin-manual/http-actions/fe/cancel-load-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/cancel-load-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/cancel-load-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/check-decommission-action.md b/docs/zh-CN/admin-manual/http-actions/fe/check-decommission-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/check-decommission-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/check-decommission-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/check-storage-type-action.md b/docs/zh-CN/admin-manual/http-actions/fe/check-storage-type-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/check-storage-type-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/check-storage-type-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/config-action.md b/docs/zh-CN/admin-manual/http-actions/fe/config-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/config-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/config-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/connection-action.md b/docs/zh-CN/admin-manual/http-actions/fe/connection-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/connection-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/connection-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-ddl-stmt-action.md b/docs/zh-CN/admin-manual/http-actions/fe/get-ddl-stmt-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/get-ddl-stmt-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/get-ddl-stmt-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-load-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/get-load-info-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/get-load-info-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/get-load-info-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-load-state.md b/docs/zh-CN/admin-manual/http-actions/fe/get-load-state.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/get-load-state.md rename to docs/zh-CN/admin-manual/http-actions/fe/get-load-state.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-log-file-action.md b/docs/zh-CN/admin-manual/http-actions/fe/get-log-file-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/get-log-file-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/get-log-file-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/get-small-file.md b/docs/zh-CN/admin-manual/http-actions/fe/get-small-file.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/get-small-file.md rename to docs/zh-CN/admin-manual/http-actions/fe/get-small-file.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/ha-action.md b/docs/zh-CN/admin-manual/http-actions/fe/ha-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/ha-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/ha-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/hardware-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/hardware-info-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/hardware-info-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/hardware-info-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/health-action.md b/docs/zh-CN/admin-manual/http-actions/fe/health-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/health-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/health-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/log-action.md b/docs/zh-CN/admin-manual/http-actions/fe/log-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/log-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/log-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/logout-action.md b/docs/zh-CN/admin-manual/http-actions/fe/logout-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/logout-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/logout-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/manager/cluster-action.md b/docs/zh-CN/admin-manual/http-actions/fe/manager/cluster-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/manager/cluster-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/manager/cluster-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/manager/node-action.md b/docs/zh-CN/admin-manual/http-actions/fe/manager/node-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/manager/node-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/manager/node-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/manager/query-profile-action.md b/docs/zh-CN/admin-manual/http-actions/fe/manager/query-profile-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/manager/query-profile-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/manager/query-profile-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/meta-action.md b/docs/zh-CN/admin-manual/http-actions/fe/meta-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/meta-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/meta-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/meta-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/meta-info-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/meta-info-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/meta-info-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/meta-replay-state-action.md b/docs/zh-CN/admin-manual/http-actions/fe/meta-replay-state-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/meta-replay-state-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/meta-replay-state-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/profile-action.md b/docs/zh-CN/admin-manual/http-actions/fe/profile-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/profile-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/profile-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/query-detail-action.md b/docs/zh-CN/admin-manual/http-actions/fe/query-detail-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/query-detail-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/query-detail-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/query-profile-action.md b/docs/zh-CN/admin-manual/http-actions/fe/query-profile-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/query-profile-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/query-profile-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/row-count-action.md b/docs/zh-CN/admin-manual/http-actions/fe/row-count-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/row-count-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/row-count-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/session-action.md b/docs/zh-CN/admin-manual/http-actions/fe/session-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/session-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/session-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/set-config-action.md b/docs/zh-CN/admin-manual/http-actions/fe/set-config-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/set-config-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/set-config-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/show-data-action.md b/docs/zh-CN/admin-manual/http-actions/fe/show-data-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/show-data-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/show-data-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/show-meta-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/show-meta-info-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/show-meta-info-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/show-meta-info-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/show-proc-action.md b/docs/zh-CN/admin-manual/http-actions/fe/show-proc-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/show-proc-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/show-proc-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/show-runtime-info-action.md b/docs/zh-CN/admin-manual/http-actions/fe/show-runtime-info-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/show-runtime-info-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/show-runtime-info-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/statement-execution-action.md b/docs/zh-CN/admin-manual/http-actions/fe/statement-execution-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/statement-execution-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/statement-execution-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/system-action.md b/docs/zh-CN/admin-manual/http-actions/fe/system-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/system-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/system-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/table-query-plan-action.md b/docs/zh-CN/admin-manual/http-actions/fe/table-query-plan-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/table-query-plan-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/table-query-plan-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/table-row-count-action.md b/docs/zh-CN/admin-manual/http-actions/fe/table-row-count-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/table-row-count-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/table-row-count-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/table-schema-action.md b/docs/zh-CN/admin-manual/http-actions/fe/table-schema-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/table-schema-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/table-schema-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/fe/upload-action.md b/docs/zh-CN/admin-manual/http-actions/fe/upload-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/fe/upload-action.md rename to docs/zh-CN/admin-manual/http-actions/fe/upload-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/get-load-state.md b/docs/zh-CN/admin-manual/http-actions/get-load-state.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/get-load-state.md rename to docs/zh-CN/admin-manual/http-actions/get-load-state.md diff --git a/docs/zh-CN/administrator-guide/http-actions/get-tablets.md b/docs/zh-CN/admin-manual/http-actions/get-tablets.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/get-tablets.md rename to docs/zh-CN/admin-manual/http-actions/get-tablets.md diff --git a/docs/zh-CN/administrator-guide/http-actions/profile-action.md b/docs/zh-CN/admin-manual/http-actions/profile-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/profile-action.md rename to docs/zh-CN/admin-manual/http-actions/profile-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/query-detail-action.md b/docs/zh-CN/admin-manual/http-actions/query-detail-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/query-detail-action.md rename to docs/zh-CN/admin-manual/http-actions/query-detail-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/restore-tablet.md b/docs/zh-CN/admin-manual/http-actions/restore-tablet.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/restore-tablet.md rename to docs/zh-CN/admin-manual/http-actions/restore-tablet.md diff --git a/docs/zh-CN/administrator-guide/http-actions/show-data-action.md b/docs/zh-CN/admin-manual/http-actions/show-data-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/show-data-action.md rename to docs/zh-CN/admin-manual/http-actions/show-data-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/tablet-migration-action.md b/docs/zh-CN/admin-manual/http-actions/tablet-migration-action.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/tablet-migration-action.md rename to docs/zh-CN/admin-manual/http-actions/tablet-migration-action.md diff --git a/docs/zh-CN/administrator-guide/http-actions/tablets_distribution.md b/docs/zh-CN/admin-manual/http-actions/tablets_distribution.md similarity index 100% rename from docs/zh-CN/administrator-guide/http-actions/tablets_distribution.md rename to docs/zh-CN/admin-manual/http-actions/tablets_distribution.md diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/be-olap-error-code.md b/docs/zh-CN/admin-manual/maint-monitor/be-olap-error-code.md similarity index 100% rename from new-docs/zh-CN/admin-manual/maint-monitor/be-olap-error-code.md rename to docs/zh-CN/admin-manual/maint-monitor/be-olap-error-code.md diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/disk-capacity.md b/docs/zh-CN/admin-manual/maint-monitor/disk-capacity.md similarity index 100% rename from new-docs/zh-CN/admin-manual/maint-monitor/disk-capacity.md rename to docs/zh-CN/admin-manual/maint-monitor/disk-capacity.md diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/doris-error-code.md b/docs/zh-CN/admin-manual/maint-monitor/doris-error-code.md similarity index 100% rename from new-docs/zh-CN/admin-manual/maint-monitor/doris-error-code.md rename to docs/zh-CN/admin-manual/maint-monitor/doris-error-code.md diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/metadata-operation.md b/docs/zh-CN/admin-manual/maint-monitor/metadata-operation.md similarity index 100% rename from new-docs/zh-CN/admin-manual/maint-monitor/metadata-operation.md rename to docs/zh-CN/admin-manual/maint-monitor/metadata-operation.md diff --git a/docs/zh-CN/administrator-guide/operation/monitor-alert.md b/docs/zh-CN/admin-manual/maint-monitor/monitor-alert.md similarity index 100% rename from docs/zh-CN/administrator-guide/operation/monitor-alert.md rename to docs/zh-CN/admin-manual/maint-monitor/monitor-alert.md diff --git a/docs/zh-CN/administrator-guide/operation/monitor-metrics/be-metrics.md b/docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/be-metrics.md similarity index 100% rename from docs/zh-CN/administrator-guide/operation/monitor-metrics/be-metrics.md rename to docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/be-metrics.md diff --git a/docs/zh-CN/administrator-guide/operation/monitor-metrics/fe-metrics.md b/docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md similarity index 100% rename from docs/zh-CN/administrator-guide/operation/monitor-metrics/fe-metrics.md rename to docs/zh-CN/admin-manual/maint-monitor/monitor-metrics/fe-metrics.md diff --git a/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md b/docs/zh-CN/admin-manual/maint-monitor/tablet-meta-tool.md similarity index 100% rename from docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md rename to docs/zh-CN/admin-manual/maint-monitor/tablet-meta-tool.md diff --git a/new-docs/zh-CN/admin-manual/maint-monitor/tablet-repair-and-balance.md b/docs/zh-CN/admin-manual/maint-monitor/tablet-repair-and-balance.md similarity index 100% rename from new-docs/zh-CN/admin-manual/maint-monitor/tablet-repair-and-balance.md rename to docs/zh-CN/admin-manual/maint-monitor/tablet-repair-and-balance.md diff --git a/docs/zh-CN/administrator-guide/operation/tablet-restore-tool.md b/docs/zh-CN/admin-manual/maint-monitor/tablet-restore-tool.md similarity index 100% rename from docs/zh-CN/administrator-guide/operation/tablet-restore-tool.md rename to docs/zh-CN/admin-manual/maint-monitor/tablet-restore-tool.md diff --git a/new-docs/zh-CN/admin-manual/multi-tenant.md b/docs/zh-CN/admin-manual/multi-tenant.md similarity index 100% rename from new-docs/zh-CN/admin-manual/multi-tenant.md rename to docs/zh-CN/admin-manual/multi-tenant.md diff --git a/new-docs/zh-CN/admin-manual/optimization.md b/docs/zh-CN/admin-manual/optimization.md similarity index 100% rename from new-docs/zh-CN/admin-manual/optimization.md rename to docs/zh-CN/admin-manual/optimization.md diff --git a/new-docs/zh-CN/admin-manual/privilege-ldap/ldap.md b/docs/zh-CN/admin-manual/privilege-ldap/ldap.md similarity index 100% rename from new-docs/zh-CN/admin-manual/privilege-ldap/ldap.md rename to docs/zh-CN/admin-manual/privilege-ldap/ldap.md diff --git a/new-docs/zh-CN/admin-manual/privilege-ldap/user-privilege.md b/docs/zh-CN/admin-manual/privilege-ldap/user-privilege.md similarity index 100% rename from new-docs/zh-CN/admin-manual/privilege-ldap/user-privilege.md rename to docs/zh-CN/admin-manual/privilege-ldap/user-privilege.md diff --git a/docs/zh-CN/administrator-guide/running-profile.md b/docs/zh-CN/admin-manual/query-profile.md similarity index 99% rename from docs/zh-CN/administrator-guide/running-profile.md rename to docs/zh-CN/admin-manual/query-profile.md index 9277ab4ae2..a2d765a4e5 100644 --- a/docs/zh-CN/administrator-guide/running-profile.md +++ b/docs/zh-CN/admin-manual/query-profile.md @@ -1,6 +1,6 @@ --- { - "title": "查询执行的统计", + "title": "查询分析", "language": "zh-CN" } --- @@ -26,6 +26,8 @@ under the License. # 查询执行的统计 +# 查询执行的统计 + 本文档主要介绍Doris在查询执行的统计结果。利用这些统计的信息,可以更好的帮助我们了解Doris的执行情况,并有针对性的进行相应**Debug与调优工作**。 @@ -271,5 +273,4 @@ OLAP_SCAN_NODE (id=0):(Active: 1.2ms, % non-child: 0.00%) - PeakReservation: Reservation的峰值 - PeakUnpinnedBytes: unpin的内存数据量 - PeakUsedReservation: Reservation的内存使用量 - - ReservationLimit: BufferPool的Reservation的限制量 - + - ReservationLimit: BufferPool的Reservation的限制量 \ No newline at end of file diff --git a/new-docs/zh-CN/admin-manual/sql-interception.md b/docs/zh-CN/admin-manual/sql-interception.md similarity index 100% rename from new-docs/zh-CN/admin-manual/sql-interception.md rename to docs/zh-CN/admin-manual/sql-interception.md diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-bitmap-index.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-bitmap-index.md deleted file mode 100644 index 5e7853c0df..0000000000 --- a/docs/zh-CN/administrator-guide/alter-table/alter-table-bitmap-index.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -{ - "title": "Bitmap 索引", - "language": "zh-CN" -} ---- - - - -# Bitmap 索引 -用户可以通过创建bitmap index 加速查询 -本文档主要介绍如何创建 index 作业,以及创建 index 的一些注意事项和常见问题。 - -## 名词解释 -* bitmap index:位图索引,是一种快速数据结构,能够加快查询速度 - -## 原理介绍 -创建和删除本质上是一个 schema change 的作业,具体细节可以参照 [Schema Change](alter-table-schema-change)。 - -## 语法 -index 创建和修改相关语法有两种形式,一种集成与 alter table 语句中,另一种是使用单独的 -create/drop index 语法 -1. 创建索引 - - 创建索引的的语法可以参见 [CREATE INDEX](../../sql-reference/sql-statements/Data%20Definition/CREATE%20INDEX.html) - 或 [ALTER TABLE](../../sql-reference/sql-statements/Data%20Definition/ALTER%20TABLE.html) 中bitmap 索引相关的操作, - 也可以通过在创建表时指定bitmap 索引,参见[CREATE TABLE](../../sql-reference/sql-statements/Data%20Definition/CREATE%20TABLE.html) - -2. 查看索引 - - 参照[SHOW INDEX](../../sql-reference/sql-statements/Administration/SHOW%20INDEX.html) - -3. 删除索引 - - 参照[DROP INDEX](../../sql-reference/sql-statements/Data%20Definition/DROP%20INDEX.html) - 或者 [ALTER TABLE](../../sql-reference/sql-statements/Data%20Definition/ALTER%20TABLE.html) 中bitmap 索引相关的操作 - -## 创建作业 -参照 schema change 文档 [Schema Change](alter-table-schema-change.html) - -## 查看作业 -参照 schema change 文档 [Schema Change](alter-table-schema-change.html) - -## 取消作业 -参照 schema change 文档 [Schema Change](alter-table-schema-change.html) - -## 注意事项 -* 目前索引仅支持 bitmap 类型的索引。 -* bitmap 索引仅在单列上创建。 -* bitmap 索引能够应用在 `Duplicate` 数据模型的所有列和 `Aggregate`, `Uniq` 模型的key列上。 -* bitmap 索引支持的数据类型如下: - * `TINYINT` - * `SMALLINT` - * `INT` - * `UNSIGNEDINT` - * `BIGINT` - * `CHAR` - * `VARCHAR` - * `DATE` - * `DATETIME` - * `LARGEINT` - * `DECIMAL` - * `BOOL` - -* bitmap索引仅在 Segment V2 下生效。当创建 index 时,表的存储格式将默认转换为 V2 格式。 diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-replace-table.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-replace-table.md deleted file mode 100644 index ce47723918..0000000000 --- a/docs/zh-CN/administrator-guide/alter-table/alter-table-replace-table.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -{ - "title": "替换表", - "language": "zh-CN" -} ---- - - - -# 替换表 - -在 0.14 版本中,Doris 支持对两个表进行原子的替换操作。 -该操作仅适用于 OLAP 表。 - -分区级别的替换操作,请参阅 [临时分区文档](./alter-table-temp-partition.md) - -## 语法说明 - -``` -ALTER TABLE [db.]tbl1 REPLACE WITH TABLE tbl2 -[PROPERTIES('swap' = 'true')]; -``` - -将表 tbl1 替换为表 tbl2。 - -如果 `swap` 参数为 `true`,则替换后,名称为 `tbl1` 表中的数据为原 `tbl2` 表中的数据。而名称为 `tbl2` 表中的数据为原 `tbl1` 表中的数据。即两张表数据发生了互换。 - -如果 `swap` 参数为 `false`,则替换后,名称为 `tbl1` 表中的数据为原 `tbl2` 表中的数据。而名称为 `tbl2` 表被删除。 - - -## 原理 - -替换表功能,实际上是将以下操作集合变成一个原子操作。 - -假设要将表 A 替换为表 B,且 `swap` 为 `true`,则操作如下: - -1. 将表 B 重名为表 A。 -2. 将表 A 重名为表 B。 - -如果 `swap` 为 `false`,则操作如下: - -1. 删除表 A。 -2. 将表 B 重名为表 A。 - -## 注意事项 - -1. `swap` 参数默认为 `true`。即替换表操作相当于将两张表数据进行交换。 -2. 如果设置 `swap` 参数为 `false`,则被替换的表(表A)将被删除,且无法恢复。 -3. 替换操作仅能发生在两张 OLAP 表之间,且不会检查两张表的表结构是否一致。 -4. 替换操作不会改变原有的权限设置。因为权限检查以表名称为准。 - -## 最佳实践 - -1. 原子的覆盖写操作 - - 某些情况下,用户希望能够重写某张表的数据,但如果采用先删除再导入的方式进行,在中间会有一段时间无法查看数据。这时,用户可以先使用 `CREATE TABLE LIKE` 语句创建一个相同结构的新表,将新的数据导入到新表后,通过替换操作,原子的替换旧表,以达到目的。分区级别的原子覆盖写操作,请参阅 [临时分区文档](./alter-table-temp-partition.md) diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-rollup.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-rollup.md deleted file mode 100644 index 7d571a2d8a..0000000000 --- a/docs/zh-CN/administrator-guide/alter-table/alter-table-rollup.md +++ /dev/null @@ -1,194 +0,0 @@ ---- -{ - "title": "Rollup", - "language": "zh-CN" -} ---- - - - -# Rollup - -用户可以通过创建上卷表(Rollup)加速查询。关于 Rollup 的概念和使用方式可以参阅 [数据模型、ROLLUP 及前缀索引](../../getting-started/data-model-rollup.md) 和 [Rollup 与查询](../../getting-started/hit-the-rollup.md) 两篇文档。 - -本文档主要介绍如何创建 Rollup 作业,以及创建 Rollup 的一些注意事项和常见问题。 - -## 名词解释 - -* Base Table:基表。每一个表被创建时,都对应一个基表。基表存储了这个表的完整的数据。Rollup 通常基于基表中的数据创建(也可以通过其他 Rollup 创建)。 -* Index:物化索引。Rollup 或 Base Table 都被称为物化索引。 -* Transaction:事务。每一个导入任务都是一个事务,每个事务有一个唯一递增的 Transaction ID。 - -## 原理介绍 - -创建 Rollup 的基本过程,是通过 Base 表的数据,生成一份新的包含指定列的 Rollup 的数据。其中主要需要进行两部分数据转换,一是已存在的历史数据的转换,二是在 Rollup 执行过程中,新到达的导入数据的转换。 - -``` -+----------+ -| Load Job | -+----+-----+ - | - | Load job generates both base and rollup index data - | - | +------------------+ +---------------+ - | | Base Index | | Base Index | - +------> New Incoming Data| | History Data | - | +------------------+ +------+--------+ - | | - | | Convert history data - | | - | +------------------+ +------v--------+ - | | Rollup Index | | Rollup Index | - +------> New Incoming Data| | History Data | - +------------------+ +---------------+ -``` - -在开始转换历史数据之前,Doris 会获取一个最新的 Transaction ID。并等待这个 Transaction ID 之前的所有导入事务完成。这个 Transaction ID 成为分水岭。意思是,Doris 保证在分水岭之后的所有导入任务,都会同时为 Rollup Index 生成数据。这样当历史数据转换完成后,可以保证 Rollup 和 Base 表的数据是齐平的。 - -## 创建作业 - -创建 Rollup 的具体语法可以查看帮助 `HELP ALTER TABLE` 中 Rollup 部分的说明。 - -Rollup 的创建是一个异步过程,作业提交成功后,用户需要通过 `SHOW ALTER TABLE ROLLUP` 命令来查看作业进度。 - -## 查看作业 - -`SHOW ALTER TABLE ROLLUP` 可以查看当前正在执行或已经完成的 Rollup 作业。举例如下: - -``` - JobId: 20037 - TableName: tbl1 - CreateTime: 2019-08-06 15:38:49 - FinishedTime: N/A - BaseIndexName: tbl1 -RollupIndexName: r1 - RollupId: 20038 - TransactionId: 10034 - State: PENDING - Msg: - Progress: N/A - Timeout: 86400 -``` - -* JobId:每个 Rollup 作业的唯一 ID。 -* TableName:Rollup 对应的基表的表名。 -* CreateTime:作业创建时间。 -* FinishedTime:作业结束时间。如未结束,则显示 "N/A"。 -* BaseIndexName:Rollup 对应的源 Index 的名称。 -* RollupIndexName:Rollup 的名称。 -* RollupId:Rollup 的唯一 ID。 -* TransactionId:转换历史数据的分水岭 transaction ID。 -* State:作业所在阶段。 - * PENDING:作业在队列中等待被调度。 - * WAITING_TXN:等待分水岭 transaction ID 之前的导入任务完成。 - * RUNNING:历史数据转换中。 - * FINISHED:作业成功。 - * CANCELLED:作业失败。 -* Msg:如果作业失败,这里会显示失败信息。 -* Progress:作业进度。只有在 RUNNING 状态才会显示进度。进度是以 M/N 的形式显示。其中 N 为 Rollup 的总副本数。M 为已完成历史数据转换的副本数。 -* Timeout:作业超时时间。单位秒。 - -## 取消作业 - -在作业状态不为 FINISHED 或 CANCELLED 的情况下,可以通过以下命令取消 Rollup 作业: - -`CANCEL ALTER TABLE ROLLUP FROM tbl_name;` - -## 注意事项 - -* 一张表在同一时间只能有一个 Rollup 作业在运行。且一个作业中只能创建一个 Rollup。 - -* Rollup 操作不阻塞导入和查询操作。 - -* 如果 DELETE 操作,where 条件中的某个 Key 列在某个 Rollup 中不存在,则不允许该 DELETE。 - - 如果某个 Key 列在某一 Rollup 中不存在,则 DELETE 操作无法对该 Rollup 进行数据删除,从而无法保证 Rollup 表和 Base 表的数据一致性。 - -* Rollup 的列必须存在于 Base 表中。 - - Rollup 的列永远是 Base 表列的子集。不能出现 Base 表中不存在的列。 - -* 如果 Rollup 中包含 REPLACE 聚合类型的列,则该 Rollup 必须包含所有 Key 列。 - - 假设 Base 表结构如下: - - ```(k1 INT, k2 INT, v1 INT REPLACE, v2 INT SUM)``` - - 如果需要创建的 Rollup 包含 `v1` 列,则必须包含 `k1`, `k2` 列。否则系统无法决定 `v1` 列在 Rollup 中的取值。 - - 注意,Unique 数据模型表中的所有 Value 列都是 REPLACE 聚合类型。 - -* DUPLICATE 数据模型表的 Rollup,可以指定 Rollup 的 DUPLICATE KEY。 - - DUPLICATE 数据模型表中的 DUPLICATE KEY 其实就是排序列。Rollup 可以指定自己的排序列,但排序列必须是 Rollup 列顺序的前缀。如果不指定,则系统会检查 Rollup 是否包含了 Base 表的所有排序列,如果没有包含,则会报错。举例: - - Base 表结构:`(k1 INT, k2 INT, k3 INT) DUPLICATE KEY(k1, k2)` - - 则 Rollup 可以为:`(k2 INT, k1 INT) DUPLICATE KEY(k2)` - -* Rollup 不需要包含 Base 表的分区列或分桶列。 - -## 常见问题 - -* 一个表可以创建多少 Rollup - - 一个表能够创建的 Rollup 个数理论上没有限制,但是过多的 Rollup 会影响导入性能。因为导入时,会同时给所有 Rollup 产生数据。同时 Rollup 会占用物理存储空间。通常一个表的 Rollup 数量在 10 个以内比较合适。 - -* Rollup 创建的速度 - - 目前 Rollup 创建速度按照最差效率估计约为 10MB/s。保守起见,用户可以根据这个速率来设置作业的超时时间。 - -* 提交作业报错 `Table xxx is not stable. ...` - - Rollup 只有在表数据完整且非均衡状态下才可以开始。如果表的某些数据分片副本不完整,或者某些副本正在进行均衡操作,则提交会被拒绝。 - - 数据分片副本是否完整,可以通过以下命令查看: - - ```ADMIN SHOW REPLICA STATUS FROM tbl WHERE STATUS != "OK";``` - - 如果有返回结果,则说明有副本有问题。通常系统会自动修复这些问题,用户也可以通过以下命令优先修复这个表: - - ```ADMIN REPAIR TABLE tbl1;``` - - 用户可以通过以下命令查看是否有正在运行的均衡任务: - - ```SHOW PROC "/cluster_balance/pending_tablets";``` - - 可以等待均衡任务完成,或者通过以下命令临时禁止均衡操作: - - ```ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");``` - -## 相关配置 - -### FE 配置 - -* `alter_table_timeout_second`:作业默认超时时间,86400 秒。 - -### BE 配置 - -* `alter_tablet_worker_count`:在 BE 端用于执行历史数据转换的线程数。默认为 3。如果希望加快 Rollup 作业的速度,可以适当调大这个参数后重启 BE。但过多的转换线程可能会导致 IO 压力增加,影响其他操作。该线程和 Schema Change 作业共用。 - - - - - - - - diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-schema-change.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-schema-change.md deleted file mode 100644 index 6ab14cc764..0000000000 --- a/docs/zh-CN/administrator-guide/alter-table/alter-table-schema-change.md +++ /dev/null @@ -1,249 +0,0 @@ ---- -{ - "title": "Schema Change", - "language": "zh-CN" -} ---- - - - -# Schema Change - -用户可以通过 Schema Change 操作来修改已存在表的 Schema。目前 Doris 支持以下几种修改: - -* 增加、删除列 -* 修改列类型 -* 调整列顺序 -* 增加、修改 Bloom Filter -* 增加、删除 bitmap index - -本文档主要介绍如何创建 Schema Change 作业,以及进行 Schema Change 的一些注意事项和常见问题。 - -## 名词解释 - -* Base Table:基表。每一个表被创建时,都对应一个基表。 -* Rollup:基于基表或者其他 Rollup 创建出来的上卷表。 -* Index:物化索引。Rollup 或 Base Table 都被称为物化索引。 -* Transaction:事务。每一个导入任务都是一个事务,每个事务有一个唯一递增的 Transaction ID。 - -## 原理介绍 - -执行 Schema Change 的基本过程,是通过原 Index 的数据,生成一份新 Schema 的 Index 的数据。其中主要需要进行两部分数据转换,一是已存在的历史数据的转换,二是在 Schema Change 执行过程中,新到达的导入数据的转换。 - -``` -+----------+ -| Load Job | -+----+-----+ - | - | Load job generates both origin and new index data - | - | +------------------+ +---------------+ - | | Origin Index | | Origin Index | - +------> New Incoming Data| | History Data | - | +------------------+ +------+--------+ - | | - | | Convert history data - | | - | +------------------+ +------v--------+ - | | New Index | | New Index | - +------> New Incoming Data| | History Data | - +------------------+ +---------------+ -``` - -在开始转换历史数据之前,Doris 会获取一个最新的 Transaction ID。并等待这个 Transaction ID 之前的所有导入事务完成。这个 Transaction ID 成为分水岭。意思是,Doris 保证在分水岭之后的所有导入任务,都会同时为原 Index 和新 Index 生成数据。这样当历史数据转换完成后,可以保证新的 Index 中的数据是完整的。 - -## 创建作业 - -创建 Schema Change 的具体语法可以查看帮助 `HELP ALTER TABLE` 中 Schema Change 部分的说明。 - -Schema Change 的创建是一个异步过程,作业提交成功后,用户需要通过 `SHOW ALTER TABLE COLUMN` 命令来查看作业进度。 - -## 查看作业 - -`SHOW ALTER TABLE COLUMN` 可以查看当前正在执行或已经完成的 Schema Change 作业。当一次 Schema Change 作业涉及到多个 Index 时,该命令会显示多行,每行对应一个 Index。举例如下: - -``` - JobId: 20021 - TableName: tbl1 - CreateTime: 2019-08-05 23:03:13 - FinishTime: 2019-08-05 23:03:42 - IndexName: tbl1 - IndexId: 20022 -OriginIndexId: 20017 -SchemaVersion: 2:792557838 -TransactionId: 10023 - State: FINISHED - Msg: - Progress: N/A - Timeout: 86400 -``` - -* JobId:每个 Schema Change 作业的唯一 ID。 -* TableName:Schema Change 对应的基表的表名。 -* CreateTime:作业创建时间。 -* FinishedTime:作业结束时间。如未结束,则显示 "N/A"。 -* IndexName: 本次修改所涉及的某一个 Index 的名称。 -* IndexId:新的 Index 的唯一 ID。 -* OriginIndexId:旧的 Index 的唯一 ID。 -* SchemaVersion:以 M:N 的格式展示。其中 M 表示本次 Schema Change 变更的版本,N 表示对应的 Hash 值。每次 Schema Change,版本都会递增。 -* TransactionId:转换历史数据的分水岭 transaction ID。 -* State:作业所在阶段。 - * PENDING:作业在队列中等待被调度。 - * WAITING_TXN:等待分水岭 transaction ID 之前的导入任务完成。 - * RUNNING:历史数据转换中。 - * FINISHED:作业成功。 - * CANCELLED:作业失败。 -* Msg:如果作业失败,这里会显示失败信息。 -* Progress:作业进度。只有在 RUNNING 状态才会显示进度。进度是以 M/N 的形式显示。其中 N 为 Schema Change 涉及的总副本数。M 为已完成历史数据转换的副本数。 -* Timeout:作业超时时间。单位秒。 - -## 取消作业 - -在作业状态不为 FINISHED 或 CANCELLED 的情况下,可以通过以下命令取消 Schema Change 作业: - -`CANCEL ALTER TABLE COLUMN FROM tbl_name;` - -## 最佳实践 - -Schema Change 可以在一个作业中,对多个 Index 进行不同的修改。举例如下: - -源 Schema: - -``` -+-----------+-------+------+------+------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+-----------+-------+------+------+------+---------+-------+ -| tbl1 | k1 | INT | No | true | N/A | | -| | k2 | INT | No | true | N/A | | -| | k3 | INT | No | true | N/A | | -| | | | | | | | -| rollup2 | k2 | INT | No | true | N/A | | -| | | | | | | | -| rollup1 | k1 | INT | No | true | N/A | | -| | k2 | INT | No | true | N/A | | -+-----------+-------+------+------+------+---------+-------+ -``` - -可以通过以下命令给 rollup1 和 rollup2 都加入一列 k4,并且再给 rollup2 加入一列 k5: - -``` -ALTER TABLE tbl1 -ADD COLUMN k4 INT default "1" to rollup1, -ADD COLUMN k4 INT default "1" to rollup2, -ADD COLUMN k5 INT default "1" to rollup2; -``` - -完成后,Schema 变为: - -``` -+-----------+-------+------+------+------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+-----------+-------+------+------+------+---------+-------+ -| tbl1 | k1 | INT | No | true | N/A | | -| | k2 | INT | No | true | N/A | | -| | k3 | INT | No | true | N/A | | -| | k4 | INT | No | true | 1 | | -| | k5 | INT | No | true | 1 | | -| | | | | | | | -| rollup2 | k2 | INT | No | true | N/A | | -| | k4 | INT | No | true | 1 | | -| | k5 | INT | No | true | 1 | | -| | | | | | | | -| rollup1 | k1 | INT | No | true | N/A | | -| | k2 | INT | No | true | N/A | | -| | k4 | INT | No | true | 1 | | -+-----------+-------+------+------+------+---------+-------+ -``` - -可以看到,Base 表 tbl1 也自动加入了 k4, k5 列。即给任意 rollup 增加的列,都会自动加入到 Base 表中。 - -同时,不允许向 Rollup 中加入 Base 表已经存在的列。如果用户需要这样做,可以重新建立一个包含新增列的 Rollup,之后再删除原 Rollup。 - -## 注意事项 - -* 一张表在同一时间只能有一个 Schema Change 作业在运行。 - -* Schema Change 操作不阻塞导入和查询操作。 - -* 分区列和分桶列不能修改。 - -* 如果 Schema 中有 REPLACE 方式聚合的 value 列,则不允许删除 Key 列。 - - 如果删除 Key 列,Doris 无法决定 REPLACE 列的取值。 - - Unique 数据模型表的所有非 Key 列都是 REPLACE 聚合方式。 - -* 在新增聚合类型为 SUM 或者 REPLACE 的 value 列时,该列的默认值对历史数据没有含义。 - - 因为历史数据已经失去明细信息,所以默认值的取值并不能实际反映聚合后的取值。 - -* 当修改列类型时,除 Type 以外的字段都需要按原列上的信息补全。 - - 如修改列 `k1 INT SUM NULL DEFAULT "1"` 类型为 BIGINT,则需执行命令如下: - - ```ALTER TABLE tbl1 MODIFY COLUMN `k1` BIGINT SUM NULL DEFAULT "1";``` - - 注意,除新的列类型外,如聚合方式,Nullable 属性,以及默认值都要按照原信息补全。 - -* 不支持修改列名称、聚合类型、Nullable 属性、默认值以及列注释。 - -## 常见问题 - -* Schema Change 的执行速度 - - 目前 Schema Change 执行速度按照最差效率估计约为 10MB/s。保守起见,用户可以根据这个速率来设置作业的超时时间。 - -* 提交作业报错 `Table xxx is not stable. ...` - - Schema Change 只有在表数据完整且非均衡状态下才可以开始。如果表的某些数据分片副本不完整,或者某些副本正在进行均衡操作,则提交会被拒绝。 - - 数据分片副本是否完整,可以通过以下命令查看: - - ```ADMIN SHOW REPLICA STATUS FROM tbl WHERE STATUS != "OK";``` - - 如果有返回结果,则说明有副本有问题。通常系统会自动修复这些问题,用户也可以通过以下命令优先修复这个表: - - ```ADMIN REPAIR TABLE tbl1;``` - - 用户可以通过以下命令查看是否有正在运行的均衡任务: - - ```SHOW PROC "/cluster_balance/pending_tablets";``` - - 可以等待均衡任务完成,或者通过以下命令临时禁止均衡操作: - - ```ADMIN SET FRONTEND CONFIG ("disable_balance" = "true");``` - -## 相关配置 - -### FE 配置 - -* `alter_table_timeout_second`:作业默认超时时间,86400 秒。 - -### BE 配置 - -* `alter_tablet_worker_count`:在 BE 端用于执行历史数据转换的线程数。默认为 3。如果希望加快 Schema Change 作业的速度,可以适当调大这个参数后重启 BE。但过多的转换线程可能会导致 IO 压力增加,影响其他操作。该线程和 Rollup 作业共用。 - - - - - - - - diff --git a/docs/zh-CN/administrator-guide/alter-table/alter-table-temp-partition.md b/docs/zh-CN/administrator-guide/alter-table/alter-table-temp-partition.md deleted file mode 100644 index b46759642f..0000000000 --- a/docs/zh-CN/administrator-guide/alter-table/alter-table-temp-partition.md +++ /dev/null @@ -1,298 +0,0 @@ ---- -{ - "title": "临时分区", - "language": "zh-CN" -} ---- - - - -# 临时分区 - -在 0.12 版本中,Doris 支持了临时分区功能。 - -临时分区是归属于某一分区表的。只有分区表可以创建临时分区。 - -## 规则 - -* 临时分区的分区列和正式分区相同,且不可修改。 -* 一张表所有临时分区之间的分区范围不可重叠,但临时分区的范围和正式分区范围可以重叠。 -* 临时分区的分区名称不能和正式分区以及其他临时分区重复。 - -## 支持的操作 - -临时分区支持添加、删除、替换操作。 - -### 添加临时分区 - -可以通过 `ALTER TABLE ADD TEMPORARY PARTITION` 语句对一个表添加临时分区: - -``` -ALTER TABLE tbl1 ADD TEMPORARY PARTITION tp1 VALUES LESS THAN("2020-02-01"); - -ALTER TABLE tbl2 ADD TEMPORARY PARTITION tp1 VALUES [("2020-01-01"), ("2020-02-01")); - -ALTER TABLE tbl1 ADD TEMPORARY PARTITION tp1 VALUES LESS THAN("2020-02-01") -("in_memory" = "true", "replication_num" = "1") -DISTRIBUTED BY HASH(k1) BUCKETS 5; - -ALTER TABLE tbl3 ADD TEMPORARY PARTITION tp1 VALUES IN ("Beijing", "Shanghai"); - -ALTER TABLE tbl4 ADD TEMPORARY PARTITION tp1 VALUES IN ((1, "Beijing"), (1, "Shanghai")); - -ALTER TABLE tbl3 ADD TEMPORARY PARTITION tp1 VALUES IN ("Beijing", "Shanghai") -("in_memory" = "true", "replication_num" = "1") -DISTRIBUTED BY HASH(k1) BUCKETS 5; - -``` - -通过 `HELP ALTER TABLE;` 查看更多帮助和示例。 - -添加操作的一些说明: - -* 临时分区的添加和正式分区的添加操作相似。临时分区的分区范围独立于正式分区。 -* 临时分区可以独立指定一些属性。包括分桶数、副本数、是否是内存表、存储介质等信息。 - -### 删除临时分区 - -可以通过 `ALTER TABLE DROP TEMPORARY PARTITION` 语句删除一个表的临时分区: - -``` -ALTER TABLE tbl1 DROP TEMPORARY PARTITION tp1; -``` - -通过 `HELP ALTER TABLE;` 查看更多帮助和示例。 - -删除操作的一些说明: - -* 删除临时分区,不影响正式分区的数据。 - -### 替换分区 - -可以通过 `ALTER TABLE REPLACE PARTITION` 语句将一个表的正式分区替换为临时分区。 - -``` -ALTER TABLE tbl1 REPLACE PARTITION (p1) WITH TEMPORARY PARTITION (tp1); - -ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1, tp2, tp3); - -ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1, tp2) -PROPERTIES ( - "strict_range" = "false", - "use_temp_partition_name" = "true" -); -``` - -通过 `HELP ALTER TABLE;` 查看更多帮助和示例。 - -替换操作有两个特殊的可选参数: - -1. `strict_range` - - 默认为 true。 - - 对于 Range 分区,当该参数为 true 时,表示要被替换的所有正式分区的范围并集需要和替换的临时分区的范围并集完全相同。当置为 false 时,只需要保证替换后,新的正式分区间的范围不重叠即可。 - - 对于 List 分区,该参数恒为 true。要被替换的所有正式分区的枚举值必须和替换的临时分区枚举值完全相同。 - - 下面举例说明: - - * 示例1 - - 待替换的分区 p1, p2, p3 的范围 (=> 并集): - - ``` - [10, 20), [20, 30), [40, 50) => [10, 30), [40, 50) - ``` - - 替换分区 tp1, tp2 的范围(=> 并集): - - ``` - [10, 30), [40, 45), [45, 50) => [10, 30), [40, 50) - ``` - - 范围并集相同,则可以使用 tp1 和 tp2 替换 p1, p2, p3。 - - * 示例2 - - 待替换的分区 p1 的范围 (=> 并集): - - ``` - [10, 50) => [10, 50) - ``` - - 替换分区 tp1, tp2 的范围(=> 并集): - - ``` - [10, 30), [40, 50) => [10, 30), [40, 50) - ``` - - 范围并集不相同,如果 `strict_range` 为 true,则不可以使用 tp1 和 tp2 替换 p1。如果为 false,且替换后的两个分区范围 `[10, 30), [40, 50)` 和其他正式分区不重叠,则可以替换。 - - * 示例3 - - 待替换的分区 p1, p2 的枚举值(=> 并集): - - ``` - (1, 2, 3), (4, 5, 6) => (1, 2, 3, 4, 5, 6) - ``` - - 替换分区 tp1, tp2, tp3 的枚举值(=> 并集): - - ``` - (1, 2, 3), (4), (5, 6) => (1, 2, 3, 4, 5, 6) - ``` - - 枚举值并集相同,可以使用 tp1,tp2,tp3 替换 p1,p2 - - * 示例4 - - 待替换的分区 p1, p2,p3 的枚举值(=> 并集): - - ``` - (("1","beijing"), ("1", "shanghai")), (("2","beijing"), ("2", "shanghai")), (("3","beijing"), ("3", "shanghai")) => (("1","beijing"), ("1", "shanghai"), ("2","beijing"), ("2", "shanghai"), ("3","beijing"), ("3", "shanghai")) - ``` - - 替换分区 tp1, tp2 的枚举值(=> 并集): - - ``` - (("1","beijing"), ("1", "shanghai")), (("2","beijing"), ("2", "shanghai"), ("3","beijing"), ("3", "shanghai")) => (("1","beijing"), ("1", "shanghai"), ("2","beijing"), ("2", "shanghai"), ("3","beijing"), ("3", "shanghai")) - ``` - - 枚举值并集相同,可以使用 tp1,tp2 替换 p1,p2,p3 - -2. `use_temp_partition_name` - - 默认为 false。当该参数为 false,并且待替换的分区和替换分区的个数相同时,则替换后的正式分区名称维持不变。如果为 true,则替换后,正式分区的名称为替换分区的名称。下面举例说明: - - * 示例1 - - ``` - ALTER TABLE tbl1 REPLACE PARTITION (p1) WITH TEMPORARY PARTITION (tp1); - ``` - - `use_temp_partition_name` 默认为 false,则在替换后,分区的名称依然为 p1,但是相关的数据和属性都替换为 tp1 的。 - - 如果 `use_temp_partition_name` 默认为 true,则在替换后,分区的名称为 tp1。p1 分区不再存在。 - - * 示例2 - - ``` - ALTER TABLE tbl1 REPLACE PARTITION (p1, p2) WITH TEMPORARY PARTITION (tp1); - ``` - - `use_temp_partition_name` 默认为 false,但因为待替换分区的个数和替换分区的个数不同,则该参数无效。替换后,分区名称为 tp1,p1 和 p2 不再存在。 - -替换操作的一些说明: - -* 分区替换成功后,被替换的分区将被删除且不可恢复。 - -## 临时分区的导入和查询 - -用户可以将数据导入到临时分区,也可以指定临时分区进行查询。 - -1. 导入临时分区 - - 根据导入方式的不同,指定导入临时分区的语法稍有差别。这里通过示例进行简单说明 - - ``` - INSERT INTO tbl TEMPORARY PARTITION(tp1, tp2, ...) SELECT .... - ``` - - ``` - curl --location-trusted -u root: -H "label:123" -H "temporary_partitions: tp1, tp2, ..." -T testData http://host:port/api/testDb/testTbl/_stream_load - ``` - - ``` - LOAD LABEL example_db.label1 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - TEMPORARY PARTITION (tp1, tp2, ...) - ... - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - ``` - - ``` - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100), - TEMPORARY PARTITIONS(tp1, tp2, ...), - WHERE k1 > 100 - PROPERTIES - (...) - FROM KAFKA - (...); - ``` - -2. 查询临时分区 - - ``` - SELECT ... FROM - tbl1 TEMPORARY PARTITION(tp1, tp2, ...) - JOIN - tbl2 TEMPORARY PARTITION(tp1, tp2, ...) - ON ... - WHERE ...; - ``` - -## 和其他操作的关系 - -### DROP - -* 使用 Drop 操作直接删除数据库或表后,可以通过 Recover 命令恢复数据库或表(限定时间内),但临时分区不会被恢复。 -* 使用 Alter 命令删除正式分区后,可以通过 Recover 命令恢复分区(限定时间内)。操作正式分区和临时分区无关。 -* 使用 Alter 命令删除临时分区后,无法通过 Recover 命令恢复临时分区。 - -### TRUNCATE - -* 使用 Truncate 命令清空表,表的临时分区会被删除,且不可恢复。 -* 使用 Truncate 命令清空正式分区时,不影响临时分区。 -* 不可使用 Truncate 命令清空临时分区。 - -### ALTER - -* 当表存在临时分区时,无法使用 Alter 命令对表进行 Schema Change、Rollup 等变更操作。 -* 当表在进行变更操作时,无法对表添加临时分区。 - - -## 最佳实践 - -1. 原子的覆盖写操作 - - 某些情况下,用户希望能够重写某一分区的数据,但如果采用先删除再导入的方式进行,在中间会有一段时间无法查看数据。这时,用户可以先创建一个对应的临时分区,将新的数据导入到临时分区后,通过替换操作,原子的替换原有分区,以达到目的。对于非分区表的原子覆盖写操作,请参阅[替换表文档](./alter-table-replace-table.md) - -2. 修改分桶数 - - 某些情况下,用户在创建分区时使用了不合适的分桶数。则用户可以先创建一个对应分区范围的临时分区,并指定新的分桶数。然后通过 `INSERT INTO` 命令将正式分区的数据导入到临时分区中,通过替换操作,原子的替换原有分区,以达到目的。 - -3. 合并或分割分区 - - 某些情况下,用户希望对分区的范围进行修改,比如合并两个分区,或将一个大分区分割成多个小分区。则用户可以先建立对应合并或分割后范围的临时分区,然后通过 `INSERT INTO` 命令将正式分区的数据导入到临时分区中,通过替换操作,原子的替换原有分区,以达到目的。 - - - - - - - - - - diff --git a/docs/zh-CN/administrator-guide/backup-restore.md b/docs/zh-CN/administrator-guide/backup-restore.md deleted file mode 100644 index 15f3ff8f59..0000000000 --- a/docs/zh-CN/administrator-guide/backup-restore.md +++ /dev/null @@ -1,193 +0,0 @@ ---- -{ - "title": "备份与恢复", - "language": "zh-CN" -} ---- - - - -# 备份与恢复 - -Doris 支持将当前数据以文件的形式,通过 broker 备份到远端存储系统中。之后可以通过 恢复 命令,从远端存储系统中将数据恢复到任意 Doris 集群。通过这个功能,Doris 可以支持将数据定期的进行快照备份。也可以通过这个功能,在不同集群间进行数据迁移。 - -该功能需要 Doris 版本 0.8.2+ - -使用该功能,需要部署对应远端存储的 broker。如 BOS、HDFS 等。可以通过 `SHOW BROKER;` 查看当前部署的 broker。 - -## 简要原理说明 - -### 备份(Backup) - -备份操作是将指定表或分区的数据,直接以 Doris 存储的文件的形式,上传到远端仓库中进行存储。当用户提交 Backup 请求后,系统内部会做如下操作: - -1. 快照及快照上传 - - 快照阶段会对指定的表或分区数据文件进行快照。之后,备份都是对快照进行操作。在快照之后,对表进行的更改、导入等操作都不再影响备份的结果。快照只是对当前数据文件产生一个硬链,耗时很少。快照完成后,会开始对这些快照文件进行逐一上传。快照上传由各个 Backend 并发完成。 - -2. 元数据准备及上传 - - 数据文件快照上传完成后,Frontend 会首先将对应元数据写成本地文件,然后通过 broker 将本地元数据文件上传到远端仓库。完成最终备份作业 - -2. 动态分区表说明 - - 如果该表是动态分区表,备份之后会自动禁用动态分区属性,在做恢复的时候需要手动将该表的动态分区属性启用,命令如下: - ```sql - ALTER TABLE tbl1 SET ("dynamic_partition.enable"="true") - ``` - -### 恢复(Restore) - -恢复操作需要指定一个远端仓库中已存在的备份,然后将这个备份的内容恢复到本地集群中。当用户提交 Restore 请求后,系统内部会做如下操作: - -1. 在本地创建对应的元数据 - - 这一步首先会在本地集群中,创建恢复对应的表分区等结构。创建完成后,该表可见,但是不可访问。 - -2. 本地snapshot - - 这一步是将上一步创建的表做一个快照。这其实是一个空快照(因为刚创建的表是没有数据的),其目的主要是在 Backend 上产生对应的快照目录,用于之后接收从远端仓库下载的快照文件。 - -3. 下载快照 - - 远端仓库中的快照文件,会被下载到对应的上一步生成的快照目录中。这一步由各个 Backend 并发完成。 - -4. 生效快照 - - 快照下载完成后,我们要将各个快照映射为当前本地表的元数据。然后重新加载这些快照,使之生效,完成最终的恢复作业。 - -## 最佳实践 - -### 备份 - -当前我们支持最小分区(Partition)粒度的全量备份(增量备份有可能在未来版本支持)。如果需要对数据进行定期备份,首先需要在建表时,合理的规划表的分区及分桶,比如按时间进行分区。然后在之后的运行过程中,按照分区粒度进行定期的数据备份。 - -### 数据迁移 - -用户可以先将数据备份到远端仓库,再通过远端仓库将数据恢复到另一个集群,完成数据迁移。因为数据备份是通过快照的形式完成的,所以,在备份作业的快照阶段之后的新的导入数据,是不会备份的。因此,在快照完成后,到恢复作业完成这期间,在原集群上导入的数据,都需要在新集群上同样导入一遍。 - -建议在迁移完成后,对新旧两个集群并行导入一段时间。完成数据和业务正确性校验后,再将业务迁移到新的集群。 - -## 重点说明 - -1. 备份恢复相关的操作目前只允许拥有 ADMIN 权限的用户执行。 -2. 一个 Database 内,只允许有一个正在执行的备份或恢复作业。 -3. 备份和恢复都支持最小分区(Partition)级别的操作,当表的数据量很大时,建议按分区分别执行,以降低失败重试的代价。 -4. 因为备份恢复操作,操作的都是实际的数据文件。所以当一个表的分片过多,或者一个分片有过多的小版本时,可能即使总数据量很小,依然需要备份或恢复很长时间。用户可以通过 `SHOW PARTITIONS FROM table_name;` 和 `SHOW TABLET FROM table_name;` 来查看各个分区的分片数量,以及各个分片的文件版本数量,来预估作业执行时间。文件数量对作业执行的时间影响非常大,所以建议在建表时,合理规划分区分桶,以避免过多的分片。 -5. 当通过 `SHOW BACKUP` 或者 `SHOW RESTORE` 命令查看作业状态时。有可能会在 `TaskErrMsg` 一列中看到错误信息。但只要 `State` 列不为 - `CANCELLED`,则说明作业依然在继续。这些 Task 有可能会重试成功。当然,有些 Task 错误,也会直接导致作业失败。 -6. 如果恢复作业是一次覆盖操作(指定恢复数据到已经存在的表或分区中),那么从恢复作业的 `COMMIT` 阶段开始,当前集群上被覆盖的数据有可能不能再被还原。此时如果恢复作业失败或被取消,有可能造成之前的数据已损坏且无法访问。这种情况下,只能通过再次执行恢复操作,并等待作业完成。因此,我们建议,如无必要,尽量不要使用覆盖的方式恢复数据,除非确认当前数据已不再使用。 - -## 相关命令 - -和备份恢复功能相关的命令如下。以下命令,都可以通过 mysql-client 连接 Doris 后,使用 `help cmd;` 的方式查看详细帮助。 - -1. CREATE REPOSITORY - - 创建一个远端仓库路径,用于备份或恢复。该命令需要借助 Broker 进程访问远端存储,不同的 Broker 需要提供不同的参数,具体请参阅 [Broker文档](broker.md),也可以直接通过S3 协议备份到支持AWS S3协议的远程存储上去,具体参考 [创建远程仓库文档](../sql-reference/sql-statements/Data%20Definition/CREATE%20REPOSITORY.md) - -1. BACKUP - - 执行一次备份操作。 - -3. SHOW BACKUP - - 查看最近一次 backup 作业的执行情况,包括: - - * JobId:本次备份作业的 id。 - * SnapshotName:用户指定的本次备份作业的名称(Label)。 - * DbName:备份作业对应的 Database。 - * State:备份作业当前所在阶段: - * PENDING:作业初始状态。 - * SNAPSHOTING:正在进行快照操作。 - * UPLOAD_SNAPSHOT:快照结束,准备上传。 - * UPLOADING:正在上传快照。 - * SAVE_META:正在本地生成元数据文件。 - * UPLOAD_INFO:上传元数据文件和本次备份作业的信息。 - * FINISHED:备份完成。 - * CANCELLED:备份失败或被取消。 - * BackupObjs:本次备份涉及的表和分区的清单。 - * CreateTime:作业创建时间。 - * SnapshotFinishedTime:快照完成时间。 - * UploadFinishedTime:快照上传完成时间。 - * FinishedTime:本次作业完成时间。 - * UnfinishedTasks:在 `SNAPSHOTTING`,`UPLOADING` 等阶段,会有多个子任务在同时进行,这里展示的当前阶段,未完成的子任务的 task id。 - * TaskErrMsg:如果有子任务执行出错,这里会显示对应子任务的错误信息。 - * Status:用于记录在整个作业过程中,可能出现的一些状态信息。 - * Timeout:作业的超时时间,单位是秒。 - -4. SHOW SNAPSHOT - - 查看远端仓库中已存在的备份。 - - * Snapshot:备份时指定的该备份的名称(Label)。 - * Timestamp:备份的时间戳。 - * Status:该备份是否正常。 - - 如果在 `SHOW SNAPSHOT` 后指定了 where 子句,则可以显示更详细的备份信息。 - - * Database:备份时对应的 Database。 - * Details:展示了该备份完整的数据目录结构。 - -5. RESTORE - - 执行一次恢复操作。 - -6. SHOW RESTORE - - 查看最近一次 restore 作业的执行情况,包括: - - * JobId:本次恢复作业的 id。 - * Label:用户指定的仓库中备份的名称(Label)。 - * Timestamp:用户指定的仓库中备份的时间戳。 - * DbName:恢复作业对应的 Database。 - * State:恢复作业当前所在阶段: - * PENDING:作业初始状态。 - * SNAPSHOTING:正在进行本地新建表的快照操作。 - * DOWNLOAD:正在发送下载快照任务。 - * DOWNLOADING:快照正在下载。 - * COMMIT:准备生效已下载的快照。 - * COMMITTING:正在生效已下载的快照。 - * FINISHED:恢复完成。 - * CANCELLED:恢复失败或被取消。 - * AllowLoad:恢复期间是否允许导入。 - * ReplicationNum:恢复指定的副本数。 - * RestoreObjs:本次恢复涉及的表和分区的清单。 - * CreateTime:作业创建时间。 - * MetaPreparedTime:本地元数据生成完成时间。 - * SnapshotFinishedTime:本地快照完成时间。 - * DownloadFinishedTime:远端快照下载完成时间。 - * FinishedTime:本次作业完成时间。 - * UnfinishedTasks:在 `SNAPSHOTTING`,`DOWNLOADING`, `COMMITTING` 等阶段,会有多个子任务在同时进行,这里展示的当前阶段,未完成的子任务的 task id。 - * TaskErrMsg:如果有子任务执行出错,这里会显示对应子任务的错误信息。 - * Status:用于记录在整个作业过程中,可能出现的一些状态信息。 - * Timeout:作业的超时时间,单位是秒。 - -7. CANCEL BACKUP - - 取消当前正在执行的备份作业。 - -8. CANCEL RESTORE - - 取消当前正在执行的恢复作业。 - -9. DROP REPOSITORY - - 删除已创建的远端仓库。删除仓库,仅仅是删除该仓库在 Doris 中的映射,不会删除实际的仓库数据。 \ No newline at end of file diff --git a/docs/zh-CN/administrator-guide/block-rule/sql-block.md b/docs/zh-CN/administrator-guide/block-rule/sql-block.md deleted file mode 100644 index 6c34d9a05e..0000000000 --- a/docs/zh-CN/administrator-guide/block-rule/sql-block.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -{ -"title": "SQL黑名单", -"language": "zh-CN" -} ---- - - - -# SQL黑名单 - -该功能仅用于限制查询语句,并且不会限制 explain 语句的执行。 -支持按用户配置SQL黑名单: - -1. 通过正则匹配的方式拒绝指定SQL - -2. 通过设置partition_num, tablet_num, cardinality, 检查一个查询是否达到其中一个限制 - - partition_num, tablet_num, cardinality 可以一起设置,一旦一个查询达到其中一个限制,查询将会被拦截 - -## 规则 - -对SQL规则增删改查 -- 创建SQL阻止规则 - - sql:匹配规则(基于正则匹配,特殊字符需要转译),可选,默认值为 "NULL" - - sqlHash: sql hash值,用于完全匹配,我们会在`fe.audit.log`打印这个值,可选,这个参数和sql只能二选一,默认值为 "NULL" - - partition_num: 一个扫描节点会扫描的最大partition数量,默认值为0L - - tablet_num: 一个扫描节点会扫描的最大tablet数量,默认值为0L - - cardinality: 一个扫描节点粗略的扫描行数,默认值为0L - - global:是否全局(所有用户)生效,默认为false - - enable:是否开启阻止规则,默认为true -```sql -CREATE SQL_BLOCK_RULE test_rule -PROPERTIES( - "sql"="select * from order_analysis", - "global"="false", - "enable"="true", - "sqlHash"="" -) -``` -当我们去执行刚才我们定义在规则里的sql时就会返回异常错误,示例如下: -```sql -mysql> select * from order_analysis; -ERROR 1064 (HY000): errCode = 2, detailMessage = sql match regex sql block rule: order_analysis_rule -``` - -- 创建 test_rule2,将最大扫描的分区数量限制在30个,最大扫描基数限制在100亿行,示例如下: -```sql -CREATE SQL_BLOCK_RULE test_rule2 PROPERTIES("partition_num" = "30", "cardinality"="10000000000","global"="false","enable"="true") -``` - -- 查看已配置的SQL阻止规则,不指定规则名则为查看所有规则 - -```sql -SHOW SQL_BLOCK_RULE [FOR RULE_NAME] -``` -- 修改SQL阻止规则,允许对sql/sqlHash/partition_num/tablet_num/cardinality/global/enable等每一项进行修改 - - sql 和 sqlHash 不能同时被设置。这意味着,如果一个rule设置了sql或者sqlHash,则另一个属性将无法被修改 - - sql/sqlHash 和 partition_num/tablet_num/cardinality 不能同时被设置。举个例子,如果一个rule设置了partition_num,那么sql或者sqlHash将无法被修改 -```sql -ALTER SQL_BLOCK_RULE test_rule PROPERTIES("sql"="select \\* from test_table","enable"="true") -``` - -``` -ALTER SQL_BLOCK_RULE test_rule2 PROPERTIES("partition_num" = "10","tablet_num"="300","enable"="true") -``` - -- 删除SQL阻止规则,支持多规则,以`,`隔开 -``` -DROP SQL_BLOCK_RULE test_rule1,test_rule2 -``` - -## 用户规则绑定 -如果配置global=false,则需要配置指定用户的规则绑定,多个规则使用`,`分隔 -```sql -SET PROPERTY [FOR 'jack'] 'sql_block_rules' = 'test_rule1,test_rule2' -``` diff --git a/docs/zh-CN/administrator-guide/bloomfilter.md b/docs/zh-CN/administrator-guide/bloomfilter.md deleted file mode 100644 index 235f34f5e2..0000000000 --- a/docs/zh-CN/administrator-guide/bloomfilter.md +++ /dev/null @@ -1,133 +0,0 @@ ---- -{ - "title": "BloomFilter索引", - "language": "zh-CN" -} ---- - - - -# BloomFilter索引 - -BloomFilter是由Bloom在1970年提出的一种多哈希函数映射的快速查找算法。通常应用在一些需要快速判断某个元素是否属于集合,但是并不严格要求100%正确的场合,BloomFilter有以下特点: - -- 空间效率高的概率型数据结构,用来检查一个元素是否在一个集合中。 -- 对于一个元素检测是否存在的调用,BloomFilter会告诉调用者两个结果之一:可能存在或者一定不存在。 -- 缺点是存在误判,告诉你可能存在,不一定真实存在。 - -布隆过滤器实际上是由一个超长的二进制位数组和一系列的哈希函数组成。二进制位数组初始全部为0,当给定一个待查询的元素时,这个元素会被一系列哈希函数计算映射出一系列的值,所有的值在位数组的偏移量处置为1。 - -下图所示出一个 m=18, k=3 (m是该Bit数组的大小,k是Hash函数的个数)的Bloom Filter示例。集合中的 x、y、z 三个元素通过 3 个不同的哈希函数散列到位数组中。当查询元素w时,通过Hash函数计算之后因为有一个比特为0,因此w不在该集合中。 - -![Bloom_filter.svg](/images/Bloom_filter.svg.png) - -那么怎么判断谋和元素是否在集合中呢?同样是这个元素经过哈希函数计算后得到所有的偏移位置,若这些位置全都为1,则判断这个元素在这个集合中,若有一个不为1,则判断这个元素不在这个集合中。就是这么简单! - -## Doris BloomFilter索引及使用使用场景 - -我们在使用HBase的时候,知道Hbase数据块索引提供了一个有效的方法,在访问一个特定的行时用来查找应该读取的HFile的数据块。但是它的效用是有限的。HFile数据块的默认大小是64KB,这个大小不能调整太多。 - -如果你要查找一个短行,只在整个数据块的起始行键上建立索引无法给你细粒度的索引信息。例如,如果你的行占用100字节存储空间,一个64KB的数据块包含(64 * 1024)/100 = 655.53 = ~700行,而你只能把起始行放在索引位上。你要查找的行可能落在特定数据块上的行区间里,但也不是肯定存放在那个数据块上。这有多种情况的可能,或者该行在表里不存在,或者存放在另一个HFile里,甚至在MemStore里。这些情况下,从硬盘读取数据块会带来IO开销,也会滥用数据块缓存。这会影响性能,尤其是当你面对一个巨大的数据集并且有很多并发读用户时。 - -所以HBase提供了布隆过滤器允许你对存储在每个数据块的数据做一个反向测试。当某行被请求时,先检查布隆过滤器看看该行是否不在这个数据块。布隆过滤器要么确定回答该行不在,要么回答它不知道。这就是为什么我们称它是反向测试。布隆过滤器也可以应用到行里的单元上。当访问某列标识符时先使用同样的反向测试。 - -布隆过滤器也不是没有代价。存储这个额外的索引层次占用额外的空间。布隆过滤器随着它们的索引对象数据增长而增长,所以行级布隆过滤器比列标识符级布隆过滤器占用空间要少。当空间不是问题时,它们可以帮助你榨干系统的性能潜力。 - -Doris的BloomFilter索引是从通过建表的时候指定,或者通过表的ALTER操作来完成。Bloom Filter本质上是一种位图结构,用于快速的判断一个给定的值是否在一个集合中。这种判断会产生小概率的误判。即如果返回false,则一定不在这个集合内。而如果范围true,则有可能在这个集合内。 - -BloomFilter索引也是以Block为粒度创建的。每个Block中,指定列的值作为一个集合生成一个BloomFilter索引条目,用于在查询是快速过滤不满足条件的数据。 - -下面我们通过实例来看看Doris怎么创建BloomFilter索引。 - -### 创建BloomFilter索引 - -Doris BloomFilter索引的创建是通过在建表语句的PROPERTIES里加上"bloom_filter_columns"="k1,k2,k3",这个属性,k1,k2,k3是你要创建的BloomFilter索引的Key列名称,例如下面我们对表里的saler_id,category_id创建了BloomFilter索引。 - -```sql -CREATE TABLE IF NOT EXISTS sale_detail_bloom ( - sale_date date NOT NULL COMMENT "销售时间", - customer_id int NOT NULL COMMENT "客户编号", - saler_id int NOT NULL COMMENT "销售员", - sku_id int NOT NULL COMMENT "商品编号", - category_id int NOT NULL COMMENT "商品分类", - sale_count int NOT NULL COMMENT "销售数量", - sale_price DECIMAL(12,2) NOT NULL COMMENT "单价", - sale_amt DECIMAL(20,2) COMMENT "销售总金额" -) -Duplicate KEY(sale_date, customer_id,saler_id,sku_id,category_id) -PARTITION BY RANGE(sale_date) -( -PARTITION P_202111 VALUES [('2021-11-01'), ('2021-12-01')) -) -DISTRIBUTED BY HASH(saler_id) BUCKETS 10 -PROPERTIES ( -"replication_num" = "3", -"bloom_filter_columns"="saler_id,category_id", -"dynamic_partition.enable" = "true", -"dynamic_partition.time_unit" = "MONTH", -"dynamic_partition.time_zone" = "Asia/Shanghai", -"dynamic_partition.start" = "-2147483648", -"dynamic_partition.end" = "2", -"dynamic_partition.prefix" = "P_", -"dynamic_partition.replication_num" = "3", -"dynamic_partition.buckets" = "3" -); -``` - -### 查看BloomFilter索引 - -查看我们在表上建立的BloomFilter索引是使用: - -``` -SHOW CREATE TABLE -``` - -### 删除BloomFilter索引 - -删除索引即为将索引列从bloom_filter_columns属性中移除: - -``` -ALTER TABLE SET ("bloom_filter_columns" = ""); -``` - -### 修改BloomFilter索引 - -修改索引即为修改表的bloom_filter_columns属性: - -``` -ALTER TABLE SET ("bloom_filter_columns" = "k1,k3"); -``` - -### **Doris BloomFilter使用场景** - -满足以下几个条件时可以考虑对某列建立Bloom Filter 索引: - -1. 首先BloomFilter适用于非前缀过滤. - -2. 查询会根据该列高频过滤,而且查询条件大多是in和 = 过滤. - -3. 不同于Bitmap, BloomFilter适用于高基数列。比如UserID。因为如果创建在低基数的列上,比如”性别“列,则每个Block几乎都会包含所有取值,导致BloomFilter索引失去意义 - -### **Doris BloomFilter使用注意事项** - -1. 不支持对Tinyint、Float、Double 类型的列建Bloom Filter索引。 - -2. Bloom Filter索引只对in和 = 过滤查询有加速效果。 -3. 如果要查看某个查询是否命中了Bloom Filter索引,可以通过查询的Profile信息查看 diff --git a/docs/zh-CN/administrator-guide/broker.md b/docs/zh-CN/administrator-guide/broker.md deleted file mode 100644 index 40cdab9948..0000000000 --- a/docs/zh-CN/administrator-guide/broker.md +++ /dev/null @@ -1,282 +0,0 @@ ---- -{ - "title": "Broker", - "language": "zh-CN" -} ---- - - - -# Broker - -Broker 是 Doris 集群中一种可选进程,主要用于支持 Doris 读写远端存储上的文件和目录,如 HDFS、BOS 和 AFS 等。 - -Broker 通过提供一个 RPC 服务端口来提供服务,是一个无状态的 Java 进程,负责为远端存储的读写操作封装一些类 POSIX 的文件操作,如 open,pread,pwrite 等等。除此之外,Broker 不记录任何其他信息,所以包括远端存储的连接信息、文件信息、权限信息等等,都需要通过参数在 RPC 调用中传递给 Broker 进程,才能使得 Broker 能够正确读写文件。 - -Broker 仅作为一个数据通路,并不参与任何计算,因此仅需占用较少的内存。通常一个 Doris 系统中会部署一个或多个 Broker 进程。并且相同类型的 Broker 会组成一个组,并设定一个 **名称(Broker name)**。 - -Broker 在 Doris 系统架构中的位置如下: - -``` -+----+ +----+ -| FE | | BE | -+-^--+ +--^-+ - | | - | | -+-v---------v-+ -| Broker | -+------^------+ - | - | -+------v------+ -|HDFS/BOS/AFS | -+-------------+ -``` - -本文档主要介绍 Broker 在访问不同远端存储时需要的参数,如连接信息、权限认证信息等等。 - -## 支持的存储系统 - -不同的 Broker 类型支持不同的存储系统。 - -1. 社区版 HDFS - - * 支持简单认证访问 - * 支持通过 kerberos 认证访问 - * 支持 HDFS HA 模式访问 - -2. 百度 HDFS/AFS(开源版本不支持) - - * 支持通过 ugi 简单认证访问 - -3. 百度对象存储 BOS(开源版本不支持) - - * 支持通过 AK/SK 认证访问 - -## 需要 Broker 的操作 - -1. Broker Load - - Broker Load 功能通过 Broker 进程读取远端存储上的文件数据并导入到 Doris 中。示例如下: - - ``` - LOAD LABEL example_db.label6 - ( - DATA INFILE("bos://my_bucket/input/file") - INTO TABLE `my_table` - ) - WITH BROKER "broker_name" - ( - "bos_endpoint" = "http://bj.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyy" - ) - ``` - - 其中 `WITH BROKER` 以及之后的 Property Map 用于提供 Broker 相关信息。 - -2. 数据导出(Export) - - Export 功能通过 Broker 进程,将 Doris 中存储的数据以文本的格式导出到远端存储的文件中。示例如下: - - ``` - EXPORT TABLE testTbl - TO "hdfs://hdfs_host:port/a/b/c" - WITH BROKER "broker_name" - ( - "username" = "xxx", - "password" = "yyy" - ); - ``` - - 其中 `WITH BROKER` 以及之后的 Property Map 用于提供 Broker 相关信息。 - -3. 创建用于备份恢复的仓库(Create Repository) - - 当用户需要使用备份恢复功能时,需要先通过 `CREATE REPOSITORY` 命令创建一个 “仓库”,仓库元信息中记录了所使用的 Broker 以及相关信息。之后的备份恢复操作,会通过 Broker 将数据备份到这个仓库,或从这个仓库读取数据恢复到 Doris 中。示例如下: - - ``` - CREATE REPOSITORY `bos_repo` - WITH BROKER `broker_name` - ON LOCATION "bos://doris_backup" - PROPERTIES - ( - "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyy" - ); - ``` - - 其中 `WITH BROKER` 以及之后的 Property Map 用于提供 Broker 相关信息。 - - -## Broker 信息 - -Broker 的信息包括 **名称(Broker name)** 和 **认证信息** 两部分。通常的语法格式如下: - -``` -WITH BROKER "broker_name" -( - "username" = "xxx", - "password" = "yyy", - "other_prop" = "prop_value", - ... -); -``` - -### 名称 - -通常用户需要通过操作命令中的 `WITH BROKER "broker_name"` 子句来指定一个已经存在的 Broker Name。Broker Name 是用户在通过 `ALTER SYSTEM ADD BROKER` 命令添加 Broker 进程时指定的一个名称。一个名称通常对应一个或多个 Broker 进程。Doris 会根据名称选择可用的 Broker 进程。用户可以通过 `SHOW BROKER` 命令查看当前集群中已经存在的 Broker。 - -**注:Broker Name 只是一个用户自定义名称,不代表 Broker 的类型。** - -### 认证信息 - -不同的 Broker 类型,以及不同的访问方式需要提供不同的认证信息。认证信息通常在 `WITH BROKER "broker_name"` 之后的 Property Map 中以 Key-Value 的方式提供。 - -#### 社区版 HDFS - -1. 简单认证 - - 简单认证即 Hadoop 配置 `hadoop.security.authentication` 为 `simple`。 - - 使用系统用户访问 HDFS。或者在 Broker 启动的环境变量中添加:```HADOOP_USER_NAME```。 - - ``` - ( - "username" = "user", - "password" = "" - ); - ``` - - 密码置空即可。 - -2. Kerberos 认证 - - 该认证方式需提供以下信息: - - * `hadoop.security.authentication`:指定认证方式为 kerberos。 - * `kerberos_principal`:指定 kerberos 的 principal。 - * `kerberos_keytab`:指定 kerberos 的 keytab 文件路径。该文件必须为 Broker 进程所在服务器上的文件的绝对路径。并且可以被 Broker 进程访问。 - * `kerberos_keytab_content`:指定 kerberos 中 keytab 文件内容经过 base64 编码之后的内容。这个跟 `kerberos_keytab` 配置二选一即可。 - - 示例如下: - - ``` - ( - "hadoop.security.authentication" = "kerberos", - "kerberos_principal" = "doris@YOUR.COM", - "kerberos_keytab" = "/home/doris/my.keytab" - ) - ``` - ``` - ( - "hadoop.security.authentication" = "kerberos", - "kerberos_principal" = "doris@YOUR.COM", - "kerberos_keytab_content" = "ASDOWHDLAWIDJHWLDKSALDJSDIWALD" - ) - ``` - 如果采用Kerberos认证方式,则部署Broker进程的时候需要[krb5.conf](https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html)文件, - krb5.conf文件包含Kerberos的配置信息,通常,您应该将krb5.conf文件安装在目录/etc中。您可以通过设置环境变量KRB5_CONFIG覆盖默认位置。 - krb5.conf文件的内容示例如下: - ``` - [libdefaults] - default_realm = DORIS.HADOOP - default_tkt_enctypes = des3-hmac-sha1 des-cbc-crc - default_tgs_enctypes = des3-hmac-sha1 des-cbc-crc - dns_lookup_kdc = true - dns_lookup_realm = false - - [realms] - DORIS.HADOOP = { - kdc = kerberos-doris.hadoop.service:7005 - } - ``` - -3. HDFS HA 模式 - - 这个配置用于访问以 HA 模式部署的 HDFS 集群。 - - * `dfs.nameservices`:指定 hdfs 服务的名字,自定义,如:"dfs.nameservices" = "my_ha"。 - * `dfs.ha.namenodes.xxx`:自定义 namenode 的名字,多个名字以逗号分隔。其中 xxx 为 `dfs.nameservices` 中自定义的名字,如: "dfs.ha.namenodes.my_ha" = "my_nn"。 - * `dfs.namenode.rpc-address.xxx.nn`:指定 namenode 的rpc地址信息。其中 nn 表示 `dfs.ha.namenodes.xxx` 中配置的 namenode 的名字,如:"dfs.namenode.rpc-address.my_ha.my_nn" = "host:port"。 - * `dfs.client.failover.proxy.provider`:指定 client 连接 namenode 的 provider,默认为:org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider。 - - 示例如下: - - ``` - ( - "dfs.nameservices" = "my_ha", - "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2", - "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port", - "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port", - "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ) - ``` - - HA 模式可以和前面两种认证方式组合,进行集群访问。如通过简单认证访问 HA HDFS: - - ``` - ( - "username"="user", - "password"="passwd", - "dfs.nameservices" = "my_ha", - "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2", - "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port", - "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port", - "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ) - ``` - 关于HDFS集群的配置可以写入hdfs-site.xml文件中,用户使用Broker进程读取HDFS集群的信息时,只需要填写集群的文件路径名和认证信息即可。 - -#### 百度对象存储 BOS - -**(开源版本不支持)** - -1. 通过 AK/SK 访问 - - * AK/SK:Access Key 和 Secret Key。在百度云安全认证中心可以查看用户的 AK/SK。 - * Region Endpoint:BOS 所在地区的 Endpoint。 - * BOS支持的区域及对应 Endpoint 请查看[获取访问域名](https://cloud.baidu.com/doc/BOS/s/Ck1rk80hn#%E8%8E%B7%E5%8F%96%E8%AE%BF%E9%97%AE%E5%9F%9F%E5%90%8D) - - 示例如下: - - ``` - ( - "bos_endpoint" = "http://bj.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey" = "yyyyyyyyyyyyyyyyyyyyyyyyyy" - ) - ``` - -#### 百度 HDFS/AFS - -**(开源版本不支持)** - -百度 AFS 和 HDFS 仅支持使用 ugi 的简单认证访问。示例如下: - -``` -( - "username" = "user", - "password" = "passwd" -); -``` - -其中 user 和 passwd 为 Hadoop 的 UGI 配置。 diff --git a/docs/zh-CN/administrator-guide/bucket-shuffle-join.md b/docs/zh-CN/administrator-guide/bucket-shuffle-join.md deleted file mode 100644 index 67ac4a20af..0000000000 --- a/docs/zh-CN/administrator-guide/bucket-shuffle-join.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -{ - "title": "Bucket Shuffle Join", - "language": "zh-CN" -} ---- - - - -# Bucket Shuffle Join - -Bucket Shuffle Join 是在 Doris 0.14 版本中正式加入的新功能。旨在为某些 Join 查询提供本地性优化,来减少数据在节点间的传输耗时,来加速查询。 - -它的设计、实现和效果可以参阅 [ISSUE 4394](https://github.com/apache/incubator-doris/issues/4394)。 - -## 名词解释 - -* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。 -* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。 -* 左表:Join查询时,左边的表。进行Probe操作。可被Join Reorder调整顺序。 -* 右表:Join查询时,右边的表。进行Build操作。可被Join Reorder调整顺序。 - -## 原理 -Doris支持的常规分布式Join方式包括了shuffle join 和broadcast join。这两种join都会导致不小的网络开销: - -举个例子,当前存在A表与B表的Join查询,它的Join方式为HashJoin,不同Join类型的开销如下: -* **Broadcast Join**: 如果根据数据分布,查询规划出A表有3个执行的HashJoinNode,那么需要将B表全量的发送到3个HashJoinNode,那么它的网络开销是`3B`,它的内存开销也是`3B`。 -* **Shuffle Join**: Shuffle Join会将A,B两张表的数据根据哈希计算分散到集群的节点之中,所以它的网络开销为 ```A + B```,内存开销为`B`。 - -在FE之中保存了Doris每个表的数据分布信息,如果join语句命中了表的数据分布列,我们应该使用数据分布信息来减少join语句的网络与内存开销,这就是Bucket Shuffle Join的思路来源。 - -![image.png](/images/bucket_shuffle_join.png) - -上面的图片展示了Bucket Shuffle Join的工作原理。SQL语句为 A表 join B表,并且join的等值表达式命中了A的数据分布列。而Bucket Shuffle Join会根据A表的数据分布信息,将B表的数据发送到对应的A表的数据存储计算节点。Bucket Shuffle Join开销如下: - -* 网络开销: ``` B < min(3B, A + B) ``` - -* 内存开销: ``` B <= min(3B, B) ``` - -可见,相比于Broadcast Join与Shuffle Join, Bucket Shuffle Join有着较为明显的性能优势。减少数据在节点间的传输耗时和Join时的内存开销。相对于Doris原有的Join方式,它有着下面的优点 - -* 首先,Bucket-Shuffle-Join降低了网络与内存开销,使一些Join查询具有了更好的性能。尤其是当FE能够执行左表的分区裁剪与桶裁剪时。 -* 其次,同时与Colocate Join不同,它对于表的数据分布方式并没有侵入性,这对于用户来说是透明的。对于表的数据分布没有强制性的要求,不容易导致数据倾斜的问题。 -* 最后,它可以为Join Reorder提供更多可能的优化空间。 - -## 使用方式 - -### 设置Session变量 - -将session变量`enable_bucket_shuffle_join`设置为`true`,则FE在进行查询规划时就会默认将能够转换为Bucket Shuffle Join的查询自动规划为Bucket Shuffle Join。 - -``` -set enable_bucket_shuffle_join = true; -``` - -在FE进行分布式查询规划时,优先选择的顺序为 Colocate Join -> Bucket Shuffle Join -> Broadcast Join -> Shuffle Join。但是如果用户显式hint了Join的类型,如: - -``` -select * from test join [shuffle] baseall on test.k1 = baseall.k1; -``` - -则上述的选择优先顺序则不生效。 - -该session变量在0.14版本默认为`true`, 而0.13版本需要手动设置为`true`。 - -### 查看Join的类型 - -可以通过`explain`命令来查看Join是否为Bucket Shuffle Join: - -``` -| 2:HASH JOIN | -| | join op: INNER JOIN (BUCKET_SHUFFLE) | -| | hash predicates: | -| | colocate: false, reason: table not in the same group | -| | equal join conjunct: `test`.`k1` = `baseall`.`k1` -``` - -在Join类型之中会指明使用的Join方式为:`BUCKET_SHUFFLE`。 - -## Bucket Shuffle Join的规划规则 - -在绝大多数场景之中,用户只需要默认打开session变量的开关就可以透明的使用这种Join方式带来的性能提升,但是如果了解Bucket Shuffle Join的规划规则,可以帮助我们利用它写出更加高效的SQL。 - -* Bucket Shuffle Join只生效于Join条件为等值的场景,原因与Colocate Join类似,它们都依赖hash来计算确定的数据分布。 -* 在等值Join条件之中包含两张表的分桶列,当左表的分桶列为等值的Join条件时,它有很大概率会被规划为Bucket Shuffle Join。 -* 由于不同的数据类型的hash值计算结果不同,所以Bucket Shuffle Join要求左表的分桶列的类型与右表等值join列的类型需要保持一致,否则无法进行对应的规划。 -* Bucket Shuffle Join只作用于Doris原生的OLAP表,对于ODBC,MySQL,ES等外表,当其作为左表时是无法规划生效的。 -* 对于分区表,由于每一个分区的数据分布规则可能不同,所以Bucket Shuffle Join只能保证左表为单分区时生效。所以在SQL执行之中,需要尽量使用`where`条件使分区裁剪的策略能够生效。 -* 假如左表为Colocate的表,那么它每个分区的数据分布规则是确定的,Bucket Shuffle Join能在Colocate表上表现更好。 diff --git a/docs/zh-CN/administrator-guide/colocation-join.md b/docs/zh-CN/administrator-guide/colocation-join.md deleted file mode 100644 index 18051b950a..0000000000 --- a/docs/zh-CN/administrator-guide/colocation-join.md +++ /dev/null @@ -1,409 +0,0 @@ ---- -{ - "title": "Colocation Join", - "language": "zh-CN" -} ---- - - - -# Colocation Join - -Colocation Join 是在 Doris 0.9 版本中引入的新功能。旨在为某些 Join 查询提供本地性优化,来减少数据在节点间的传输耗时,加速查询。 - -最初的设计、实现和效果可以参阅 [ISSUE 245](https://github.com/apache/incubator-doris/issues/245)。 - -Colocation Join 功能经过一次改版,设计和使用方式和最初设计稍有不同。本文档主要介绍 Colocation Join 的原理、实现、使用方式和注意事项。 - -## 名词解释 - -* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。 -* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。 -* Colocation Group(CG):一个 CG 中会包含一张及以上的 Table。在同一个 Group 内的 Table 有着相同的 Colocation Group Schema,并且有着相同的数据分片分布。 -* Colocation Group Schema(CGS):用于描述一个 CG 中的 Table,和 Colocation 相关的通用 Schema 信息。包括分桶列类型,分桶数以及副本数等。 - -## 原理 - -Colocation Join 功能,是将一组拥有相同 CGS 的 Table 组成一个 CG。并保证这些 Table 对应的数据分片会落在同一个 BE 节点上。使得当 CG 内的表进行分桶列上的 Join 操作时,可以通过直接进行本地数据 Join,减少数据在节点间的传输耗时。 - -一个表的数据,最终会根据分桶列值 Hash、对桶数取模的后落在某一个分桶内。假设一个 Table 的分桶数为 8,则共有 `[0, 1, 2, 3, 4, 5, 6, 7]` 8 个分桶(Bucket),我们称这样一个序列为一个 `BucketsSequence`。每个 Bucket 内会有一个或多个数据分片(Tablet)。当表为单分区表时,一个 Bucket 内仅有一个 Tablet。如果是多分区表,则会有多个。 - -为了使得 Table 能够有相同的数据分布,同一 CG 内的 Table 必须保证以下属性相同: - -1. 分桶列和分桶数 - - 分桶列,即在建表语句中 `DISTRIBUTED BY HASH(col1, col2, ...)` 中指定的列。分桶列决定了一张表的数据通过哪些列的值进行 Hash 划分到不同的 Tablet 中。同一 CG 内的 Table 必须保证分桶列的类型和数量完全一致,并且桶数一致,才能保证多张表的数据分片能够一一对应的进行分布控制。 - -2. 副本数 - - 同一个 CG 内所有表的所有分区(Partition)的副本数必须一致。如果不一致,可能出现某一个 Tablet 的某一个副本,在同一个 BE 上没有其他的表分片的副本对应。 - -同一个 CG 内的表,分区的个数、范围以及分区列的类型不要求一致。 - -在固定了分桶列和分桶数后,同一个 CG 内的表会拥有相同的 BucketsSequence。而副本数决定了每个分桶内的 Tablet 的多个副本,存放在哪些 BE 上。假设 BucketsSequence 为 `[0, 1, 2, 3, 4, 5, 6, 7]`,BE 节点有 `[A, B, C, D]` 4个。则一个可能的数据分布如下: - -``` -+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ -| 0 | | 1 | | 2 | | 3 | | 4 | | 5 | | 6 | | 7 | -+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ -| A | | B | | C | | D | | A | | B | | C | | D | -| | | | | | | | | | | | | | | | -| B | | C | | D | | A | | B | | C | | D | | A | -| | | | | | | | | | | | | | | | -| C | | D | | A | | B | | C | | D | | A | | B | -+---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ -``` - -CG 内所有表的数据都会按照上面的规则进行统一分布,这样就保证了,分桶列值相同的数据都在同一个 BE 节点上,可以进行本地数据 Join。 - -## 使用方式 - -### 建表 - -建表时,可以在 `PROPERTIES` 中指定属性 `"colocate_with" = "group_name"`,表示这个表是一个 Colocation Join 表,并且归属于一个指定的 Colocation Group。 - -示例: - -``` -CREATE TABLE tbl (k1 int, v1 int sum) -DISTRIBUTED BY HASH(k1) -BUCKETS 8 -PROPERTIES( - "colocate_with" = "group1" -); -``` - -如果指定的 Group 不存在,则 Doris 会自动创建一个只包含当前这张表的 Group。如果 Group 已存在,则 Doris 会检查当前表是否满足 Colocation Group Schema。如果满足,则会创建该表,并将该表加入 Group。同时,表会根据已存在的 Group 中的数据分布规则创建分片和副本。 -Group 归属于一个 Database,Group 的名字在一个 Database 内唯一。在内部存储是 Group 的全名为 `dbId_groupName`,但用户只感知 groupName。 - -### 删表 - -当 Group 中最后一张表彻底删除后(彻底删除是指从回收站中删除。通常,一张表通过 `DROP TABLE` 命令删除后,会在回收站默认停留一天的时间后,再删除),该 Group 也会被自动删除。 - -### 查看 Group - -以下命令可以查看集群内已存在的 Group 信息。 - -``` -SHOW PROC '/colocation_group'; - -+-------------+--------------+--------------+------------+----------------+----------+----------+ -| GroupId | GroupName | TableIds | BucketsNum | ReplicationNum | DistCols | IsStable | -+-------------+--------------+--------------+------------+----------------+----------+----------+ -| 10005.10008 | 10005_group1 | 10007, 10040 | 10 | 3 | int(11) | true | -+-------------+--------------+--------------+------------+----------------+----------+----------+ -``` - -* GroupId: 一个 Group 的全集群唯一标识,前半部分为 db id,后半部分为 group id。 -* GroupName: Group 的全名。 -* TabletIds: 该 Group 包含的 Table 的 id 列表。 -* BucketsNum: 分桶数。 -* ReplicationNum: 副本数。 -* DistCols: Distribution columns,即分桶列类型。 -* IsStable: 该 Group 是否稳定(稳定的定义,见 `Colocation 副本均衡和修复` 一节)。 - -通过以下命令可以进一步查看一个 Group 的数据分布情况: - -``` -SHOW PROC '/colocation_group/10005.10008'; - -+-------------+---------------------+ -| BucketIndex | BackendIds | -+-------------+---------------------+ -| 0 | 10004, 10002, 10001 | -| 1 | 10003, 10002, 10004 | -| 2 | 10002, 10004, 10001 | -| 3 | 10003, 10002, 10004 | -| 4 | 10002, 10004, 10003 | -| 5 | 10003, 10002, 10001 | -| 6 | 10003, 10004, 10001 | -| 7 | 10003, 10004, 10002 | -+-------------+---------------------+ -``` - -* BucketIndex: 分桶序列的下标。 -* BackendIds: 分桶中数据分片所在的 BE 节点 id 列表。 - -> 以上命令需要 ADMIN 权限。暂不支持普通用户查看。 - -### 修改表 Colocate Group 属性 - -可以对一个已经创建的表,修改其 Colocation Group 属性。示例: - -`ALTER TABLE tbl SET ("colocate_with" = "group2");` - -* 如果该表之前没有指定过 Group,则该命令检查 Schema,并将该表加入到该 Group(Group 不存在则会创建)。 -* 如果该表之前有指定其他 Group,则该命令会先将该表从原有 Group 中移除,并加入新 Group(Group 不存在则会创建)。 - -也可以通过以下命令,删除一个表的 Colocation 属性: - -`ALTER TABLE tbl SET ("colocate_with" = "");` - -### 其他相关操作 - -当对一个具有 Colocation 属性的表进行增加分区(ADD PARTITION)、修改副本数时,Doris 会检查修改是否会违反 Colocation Group Schema,如果违反则会拒绝。 - -## Colocation 副本均衡和修复 - -Colocation 表的副本分布需要遵循 Group 中指定的分布,所以在副本修复和均衡方面和普通分片有所区别。 - -Group 自身有一个 Stable 属性,当 Stable 为 true 时,表示当前 Group 内的表的所有分片没有正在进行变动,Colocation 特性可以正常使用。当 Stable 为 false 时(Unstable),表示当前 Group 内有部分表的分片正在做修复或迁移,此时,相关表的 Colocation Join 将退化为普通 Join。 - -### 副本修复 - -副本只能存储在指定的 BE 节点上。所以当某个 BE 不可用时(宕机、Decommission 等),需要寻找一个新的 BE 进行替换。Doris 会优先寻找负载最低的 BE 进行替换。替换后,该 Bucket 内的所有在旧 BE 上的数据分片都要做修复。迁移过程中,Group 被标记为 Unstable。 - -### 副本均衡 - -Doris 会尽力将 Colocation 表的分片均匀分布在所有 BE 节点上。对于普通表的副本均衡,是以单副本为粒度的,即单独为每一个副本寻找负载较低的 BE 节点即可。而 Colocation 表的均衡是 Bucket 级别的,即一个 Bucket 内的所有副本都会一起迁移。我们采用一个简单的均衡算法,即在不考虑副本实际大小,而只根据副本数量,将 BucketsSequence 均匀的分布在所有 BE 上。具体算法可以参阅 `ColocateTableBalancer.java` 中的代码注释。 - -> 注1:当前的 Colocation 副本均衡和修复算法,对于异构部署的 Doris 集群效果可能不佳。所谓异构部署,即 BE 节点的磁盘容量、数量、磁盘类型(SSD 和 HDD)不一致。在异构部署情况下,可能出现小容量的 BE 节点和大容量的 BE 节点存储了相同的副本数量。 -> -> 注2:当一个 Group 处于 Unstable 状态时,其中的表的 Join 将退化为普通 Join。此时可能会极大降低集群的查询性能。如果不希望系统自动均衡,可以设置 FE 的配置项 `disable_colocate_balance` 来禁止自动均衡。然后在合适的时间打开即可。(具体参阅 `高级操作` 一节) - -## 查询 - -对 Colocation 表的查询方式和普通表一样,用户无需感知 Colocation 属性。如果 Colocation 表所在的 Group 处于 Unstable 状态,将自动退化为普通 Join。 - -举例说明: - -表1: - -``` -CREATE TABLE `tbl1` ( - `k1` date NOT NULL COMMENT "", - `k2` int(11) NOT NULL COMMENT "", - `v1` int(11) SUM NOT NULL COMMENT "" -) ENGINE=OLAP -AGGREGATE KEY(`k1`, `k2`) -PARTITION BY RANGE(`k1`) -( - PARTITION p1 VALUES LESS THAN ('2019-05-31'), - PARTITION p2 VALUES LESS THAN ('2019-06-30') -) -DISTRIBUTED BY HASH(`k2`) BUCKETS 8 -PROPERTIES ( - "colocate_with" = "group1" -); -``` - -表2: - -``` -CREATE TABLE `tbl2` ( - `k1` datetime NOT NULL COMMENT "", - `k2` int(11) NOT NULL COMMENT "", - `v1` double SUM NOT NULL COMMENT "" -) ENGINE=OLAP -AGGREGATE KEY(`k1`, `k2`) -DISTRIBUTED BY HASH(`k2`) BUCKETS 8 -PROPERTIES ( - "colocate_with" = "group1" -); -``` - -查看查询计划: - -``` -DESC SELECT * FROM tbl1 INNER JOIN tbl2 ON (tbl1.k2 = tbl2.k2); - -+----------------------------------------------------+ -| Explain String | -+----------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS:`tbl1`.`k1` | | -| PARTITION: RANDOM | -| | -| RESULT SINK | -| | -| 2:HASH JOIN | -| | join op: INNER JOIN | -| | hash predicates: | -| | colocate: true | -| | `tbl1`.`k2` = `tbl2`.`k2` | -| | tuple ids: 0 1 | -| | | -| |----1:OlapScanNode | -| | TABLE: tbl2 | -| | PREAGGREGATION: OFF. Reason: null | -| | partitions=0/1 | -| | rollup: null | -| | buckets=0/0 | -| | cardinality=-1 | -| | avgRowSize=0.0 | -| | numNodes=0 | -| | tuple ids: 1 | -| | | -| 0:OlapScanNode | -| TABLE: tbl1 | -| PREAGGREGATION: OFF. Reason: No AggregateInfo | -| partitions=0/2 | -| rollup: null | -| buckets=0/0 | -| cardinality=-1 | -| avgRowSize=0.0 | -| numNodes=0 | -| tuple ids: 0 | -+----------------------------------------------------+ -``` -如果 Colocation Join 生效,则 Hash Join 节点会显示 `colocate: true`。 - -如果没有生效,则查询计划如下: - -``` -+----------------------------------------------------+ -| Explain String | -+----------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS:`tbl1`.`k1` | | -| PARTITION: RANDOM | -| | -| RESULT SINK | -| | -| 2:HASH JOIN | -| | join op: INNER JOIN (BROADCAST) | -| | hash predicates: | -| | colocate: false, reason: group is not stable | -| | `tbl1`.`k2` = `tbl2`.`k2` | -| | tuple ids: 0 1 | -| | | -| |----3:EXCHANGE | -| | tuple ids: 1 | -| | | -| 0:OlapScanNode | -| TABLE: tbl1 | -| PREAGGREGATION: OFF. Reason: No AggregateInfo | -| partitions=0/2 | -| rollup: null | -| buckets=0/0 | -| cardinality=-1 | -| avgRowSize=0.0 | -| numNodes=0 | -| tuple ids: 0 | -| | -| PLAN FRAGMENT 1 | -| OUTPUT EXPRS: | -| PARTITION: RANDOM | -| | -| STREAM DATA SINK | -| EXCHANGE ID: 03 | -| UNPARTITIONED | -| | -| 1:OlapScanNode | -| TABLE: tbl2 | -| PREAGGREGATION: OFF. Reason: null | -| partitions=0/1 | -| rollup: null | -| buckets=0/0 | -| cardinality=-1 | -| avgRowSize=0.0 | -| numNodes=0 | -| tuple ids: 1 | -+----------------------------------------------------+ -``` - -HASH JOIN 节点会显示对应原因:`colocate: false, reason: group is not stable`。同时会有一个 EXCHANGE 节点生成。 - - -## 高级操作 - -### FE 配置项 - -* disable\_colocate\_relocate - - 是否关闭 Doris 的自动 Colocation 副本修复。默认为 false,即不关闭。该参数只影响 Colocation 表的副本修复,不影响普通表。 - -* disable\_colocate\_balance - - 是否关闭 Doris 的自动 Colocation 副本均衡。默认为 false,即不关闭。该参数只影响 Colocation 表的副本均衡,不影响普通表。 - -以上参数可以动态修改,设置方式请参阅 `HELP ADMIN SHOW CONFIG;` 和 `HELP ADMIN SET CONFIG;`。 - -* disable\_colocate\_join - - 是否关闭 Colocation Join 功能。在 0.10 及之前的版本,默认为 true,即关闭。在之后的某个版本中将默认为 false,即开启。 - -* use\_new\_tablet\_scheduler - - 在 0.10 及之前的版本中,新的副本调度逻辑与 Colocation Join 功能不兼容,所以在 0.10 及之前版本,如果 `disable_colocate_join = false`,则需设置 `use_new_tablet_scheduler = false`,即关闭新的副本调度器。之后的版本中,`use_new_tablet_scheduler` 将衡为 true。 - -### HTTP Restful API - -Doris 提供了几个和 Colocation Join 有关的 HTTP Restful API,用于查看和修改 Colocation Group。 - -该 API 实现在 FE 端,使用 `fe_host:fe_http_port` 进行访问。需要 ADMIN 权限。 - -1. 查看集群的全部 Colocation 信息 - - ``` - GET /api/colocate - - 返回以 Json 格式表示内部 Colocation 信息。 - - { - "msg": "success", - "code": 0, - "data": { - "infos": [ - ["10003.12002", "10003_group1", "10037, 10043", "1", "1", "int(11)", "true"] - ], - "unstableGroupIds": [], - "allGroupIds": [{ - "dbId": 10003, - "grpId": 12002 - }] - }, - "count": 0 - } - ``` - -2. 将 Group 标记为 Stable 或 Unstable - - * 标记为 Stable - - ``` - POST /api/colocate/group_stable?db_id=10005&group_id=10008 - - 返回:200 - ``` - - * 标记为 Unstable - - ``` - DELETE /api/colocate/group_stable?db_id=10005&group_id=10008 - - 返回:200 - ``` - -3. 设置 Group 的数据分布 - - 该接口可以强制设置某一 Group 的数分布。 - - ``` - POST /api/colocate/bucketseq?db_id=10005&group_id=10008 - - Body: - [[10004,10002],[10003,10002],[10002,10004],[10003,10002],[10002,10004],[10003,10002],[10003,10004],[10003,10004],[10003,10004],[10002,10004]] - - 返回 200 - ``` - 其中 Body 是以嵌套数组表示的 BucketsSequence 以及每个 Bucket 中分片分布所在 BE 的 id。 - - 注意,使用该命令,可能需要将 FE 的配置 `disable_colocate_relocate` 和 `disable_colocate_balance` 设为 true。即关闭系统自动的 Colocation 副本修复和均衡。否则可能在修改后,会被系统自动重置。 diff --git a/docs/zh-CN/administrator-guide/config/be_config.md b/docs/zh-CN/administrator-guide/config/be_config.md deleted file mode 100644 index 5820889df7..0000000000 --- a/docs/zh-CN/administrator-guide/config/be_config.md +++ /dev/null @@ -1,1543 +0,0 @@ ---- -{ - "title": "BE 配置项", - "language": "zh-CN" -} ---- - - - - - -# BE 配置项 - -该文档主要介绍 BE 的相关配置项。 - -BE 的配置文件 `be.conf` 通常存放在 BE 部署路径的 `conf/` 目录下。 而在 0.14 版本中会引入另一个配置文件 `be_custom.conf`。该配置文件用于记录用户在运行时动态配置并持久化的配置项。 - -BE 进程启动后,会先读取 `be.conf` 中的配置项,之后再读取 `be_custom.conf` 中的配置项。`be_custom.conf` 中的配置项会覆盖 `be.conf` 中相同的配置项。 - -## 查看配置项 - -用户可以通过访问 BE 的 Web 页面查看当前配置项: - -`http://be_host:be_webserver_port/varz` - -## 设置配置项 - -BE 的配置项有两种方式进行配置: - -1. 静态配置 - - 在 `conf/be.conf` 文件中添加和设置配置项。`be.conf` 中的配置项会在 BE 进行启动时被读取。没有在 `be.conf` 中的配置项将使用默认值。 - -2. 动态配置 - - BE 启动后,可以通过以下命令动态设置配置项。 - - ``` - curl -X POST http://{be_ip}:{be_http_port}/api/update_config?{key}={value}' - ``` - - 在 0.13 版本及之前,通过该方式修改的配置项将在 BE 进程重启后失效。在 0.14 及之后版本中,可以通过以下命令持久化修改后的配置。修改后的配置项存储在 `be_custom.conf` 文件中。 - - ``` - curl -X POST http://{be_ip}:{be_http_port}/api/update_config?{key}={value}&persist=true' - ``` - -## 应用举例 - -1. 静态方式修改 `max_compaction_concurrency` - - 通过在 `be.conf` 文件中添加: - - ```max_compaction_concurrency=5``` - - 之后重启 BE 进程以生效该配置。 - -2. 动态方式修改 `streaming_load_max_mb` - - BE 启动后,通过下面命令动态设置配置项 `streaming_load_max_mb`: - - ```curl -X POST http://{be_ip}:{be_http_port}/api/update_config?streaming_load_max_mb=1024``` - - 返回值如下,则说明设置成功。 - - ``` - { - "status": "OK", - "msg": "" - } - ``` - - BE 重启后该配置将失效。如果想持久化修改结果,使用如下命令: - - ``` - curl -X POST http://{be_ip}:{be_http_port}/api/update_config?streaming_load_max_mb=1024\&persist=true - ``` - -## 配置项列表 - -### `alter_tablet_worker_count` - -默认值:3 - -进行schema change的线程数 - -### `base_compaction_check_interval_seconds` - -默认值:60 (s) - -BaseCompaction线程轮询的间隔 - -### `base_compaction_interval_seconds_since_last_operation` - -默认值:86400 - -BaseCompaction触发条件之一:上一次BaseCompaction距今的间隔 - -### `base_compaction_num_cumulative_deltas` - -默认值:5 - -BaseCompaction触发条件之一:Cumulative文件数目要达到的限制,达到这个限制之后会触发BaseCompaction - -### `base_compaction_write_mbytes_per_sec` - -默认值:5(MB) - -BaseCompaction任务每秒写磁盘最大速度 - -### `base_cumulative_delta_ratio` - -默认值:0.3 (30%) - -BaseCompaction触发条件之一:Cumulative文件大小达到Base文件的比例 - -### `base_compaction_trace_threshold` - -* 类型:int32 -* 描述:打印base compaction的trace信息的阈值,单位秒 -* 默认值:10 - -base compaction是一个耗时较长的后台操作,为了跟踪其运行信息,可以调整这个阈值参数来控制trace日志的打印。打印信息如下: - -``` -W0610 11:26:33.804431 56452 storage_engine.cpp:552] Trace: -0610 11:23:03.727535 (+ 0us) storage_engine.cpp:554] start to perform base compaction -0610 11:23:03.728961 (+ 1426us) storage_engine.cpp:560] found best tablet 546859 -0610 11:23:03.728963 (+ 2us) base_compaction.cpp:40] got base compaction lock -0610 11:23:03.729029 (+ 66us) base_compaction.cpp:44] rowsets picked -0610 11:24:51.784439 (+108055410us) compaction.cpp:46] got concurrency lock and start to do compaction -0610 11:24:51.784818 (+ 379us) compaction.cpp:74] prepare finished -0610 11:26:33.359265 (+101574447us) compaction.cpp:87] merge rowsets finished -0610 11:26:33.484481 (+125216us) compaction.cpp:102] output rowset built -0610 11:26:33.484482 (+ 1us) compaction.cpp:106] check correctness finished -0610 11:26:33.513197 (+ 28715us) compaction.cpp:110] modify rowsets finished -0610 11:26:33.513300 (+ 103us) base_compaction.cpp:49] compaction finished -0610 11:26:33.513441 (+ 141us) base_compaction.cpp:56] unused rowsets have been moved to GC queue -Metrics: {"filtered_rows":0,"input_row_num":3346807,"input_rowsets_count":42,"input_rowsets_data_size":1256413170,"input_segments_num":44,"merge_rowsets_latency_us":101574444,"merged_rows":0,"output_row_num":3346807,"output_rowset_data_size":1228439659,"output_segments_num":6} -``` - -### `be_port` - -* 类型:int32 -* 描述:BE 上 thrift server 的端口号,用于接收来自 FE 的请求 -* 默认值:9060 - -### `be_service_threads` -* 类型:int32 -* 描述:BE 上 thrift server service的执行线程数,代表可以用于执行FE请求的线程数。 -* 默认值:64 - -### `brpc_max_body_size` - -这个配置主要用来修改 brpc 的参数 `max_body_size`。 - -有时查询失败,在 BE 日志中会出现 `body_size is too large` 的错误信息。这可能发生在 SQL 模式为 multi distinct + 无 group by + 超过1T 数据量的情况下。这个错误表示 brpc 的包大小超过了配置值。此时可以通过调大该配置避免这个错误。 - -### `brpc_socket_max_unwritten_bytes` - -这个配置主要用来修改 brpc 的参数 `socket_max_unwritten_bytes`。 - -有时查询失败,BE 日志中会出现 `The server is overcrowded` 的错误信息,表示连接上有过多的未发送数据。当查询需要发送较大的bitmap字段时,可能会遇到该问题,此时可能通过调大该配置避免该错误。 - -### `transfer_data_by_brpc_attachment` - -* 类型: bool -* 描述:该配置用来控制是否将ProtoBuf Request中的RowBatch转移到Controller Attachment后通过brpc发送。ProtoBuf Request的长度超过2G时会报错: Bad request, error_text=[E1003]Fail to compress request,将RowBatch放到Controller Attachment中将更快且避免这个错误。 -* 默认值:false - -### `brpc_num_threads` - -该配置主要用来修改brpc中bthreads的数量. 该配置的默认值被设置为-1, 这意味着bthreads的数量将被设置为机器的cpu核数。 - -用户可以将该配置的值调大来获取更好的QPS性能。更多的信息可以参考`https://github.com/apache/incubator-brpc/blob/master/docs/cn/benchmark.md`。 - -### `brpc_port` - -* 类型:int32 -* 描述:BE 上的 brpc 的端口,用于 BE 之间通讯 -* 默认值:8060 - -### `buffer_pool_clean_pages_limit` - -默认值:20G - -清理可能被缓冲池保存的Page - -### `buffer_pool_limit` - -* 类型:string -* 描述:buffer pool之中最大的可分配内存 -* 默认值:20% - -BE缓存池最大的内存可用量,buffer pool是BE新的内存管理结构,通过buffer page来进行内存管理,并能够实现数据的落盘。并发的所有查询的内存申请都会通过buffer pool来申请。当前buffer pool仅作用在**AggregationNode**与**ExchangeNode**。 - -### `check_auto_compaction_interval_seconds` - -* 类型:int32 -* 描述:当自动执行compaction的功能关闭时,检查自动compaction开关是否被开启的时间间隔。 -* 默认值:5 - -### `check_consistency_worker_count` - -默认值:1 - -计算tablet的校验和(checksum)的工作线程数 - -### `chunk_reserved_bytes_limit` - -默认值:2147483648 - -Chunk Allocator的reserved bytes限制,默认为2GB,增加这个变量可以提高性能,但是会获得更多其他模块无法使用的空闲内存 - -### `clear_transaction_task_worker_count` - -默认值:1 - -用于清理事务的线程数 - -### `clone_worker_count` - -默认值:3 - -用于执行克隆任务的线程数 - -### `cluster_id` - -* 类型:int32 - -* 描述:配置BE的所属于的集群id。 - -* 默认值:-1 - -该值通常由FE通过心跳向BE下发,不需要额外进行配置。当确认某BE属于某一个确定的Drois集群时,可以进行配置,同时需要修改数据目录下的cluster_id文件,使二者相同。 - -### `column_dictionary_key_ratio_threshold` - -默认值:0 - -字符串类型的取值比例,小于这个比例采用字典压缩算法 - -### `column_dictionary_key_size_threshold` - -默认值:0 - -字典压缩列大小,小于这个值采用字典压缩算法 - -### `compaction_tablet_compaction_score_factor` - -* 类型:int32 -* 描述:选择tablet进行compaction时,计算 tablet score 的公式中 compaction score的权重。 -* 默认值:1 - -### `compaction_tablet_scan_frequency_factor` - -* 类型:int32 -* 描述:选择tablet进行compaction时,计算 tablet score 的公式中 tablet scan frequency 的权重。 -* 默认值:0 - -选择一个tablet执行compaction任务时,可以将tablet的scan频率作为一个选择依据,对当前最近一段时间频繁scan的tablet优先执行compaction。 -tablet score可以通过以下公式计算: - -tablet_score = compaction_tablet_scan_frequency_factor * tablet_scan_frequency + compaction_tablet_compaction_score_factor * compaction_score - -### `compaction_task_num_per_disk` - -* 类型:int32 -* 描述:每个磁盘(HDD)可以并发执行的compaction任务数量。 -* 默认值:2 - -### `compaction_task_num_per_fast_disk` - -* 类型:int32 -* 描述:每个高速磁盘(SSD)可以并发执行的compaction任务数量。 -* 默认值:4 - -### `compress_rowbatches` -* 类型:bool - -* 描述:序列化RowBatch时是否使用Snappy压缩算法进行数据压缩 - -* 默认值:true - -### `create_tablet_worker_count` - -默认值:3 - -BE创建tablet的工作线程数 - -### `cumulative_compaction_rounds_for_each_base_compaction_round` - -* 类型:int32 -* 描述:Compaction任务的生产者每次连续生产多少轮cumulative compaction任务后生产一轮base compaction。 -* 默认值:9 - -### `disable_auto_compaction` - -* 类型:bool -* 描述:关闭自动执行compaction任务 -* 默认值:false - -一般需要为关闭状态,当调试或测试环境中想要手动操作compaction任务时,可以对该配置进行开启 - -### `cumulative_compaction_budgeted_bytes` - -默认值:104857600 - -BaseCompaction触发条件之一:Singleton文件大小限制,100MB - -### `cumulative_compaction_check_interval_seconds` - -默认值:10 (s) - -CumulativeCompaction线程轮询的间隔 - -### `cumulative_compaction_skip_window_seconds` - -默认值:30 (s) - -CumulativeCompaction会跳过最近发布的增量,以防止压缩可能被查询的版本(以防查询计划阶段花费一些时间)。改参数是设置跳过的窗口时间大小 - -### `cumulative_compaction_trace_threshold` - -* 类型:int32 -* 描述:打印cumulative compaction的trace信息的阈值,单位秒 -* 默认值:2 - -与base_compaction_trace_threshold类似。 - -### disable_compaction_trace_log - -* 类型: bool -* 描述: 关闭compaction的trace日志 -* 默认值: true - -如果设置为true,`cumulative_compaction_trace_threshold` 和 `base_compaction_trace_threshold` 将不起作用。并且trace日志将关闭。 - -### `cumulative_compaction_policy` - -* 类型:string -* 描述:配置 cumulative compaction 阶段的合并策略,目前实现了两种合并策略,num_based和size_based -* 默认值:size_based - -详细说明,ordinary,是最初版本的cumulative compaction合并策略,做一次cumulative compaction之后直接base compaction流程。size_based,通用策略是ordinary策略的优化版本,仅当rowset的磁盘体积在相同数量级时才进行版本合并。合并之后满足条件的rowset进行晋升到base compaction阶段。能够做到在大量小批量导入的情况下:降低base compact的写入放大率,并在读取放大率和空间放大率之间进行权衡,同时减少了文件版本的数据。 - -### `cumulative_size_based_promotion_size_mbytes` - -* 类型:int64 -* 描述:在size_based策略下,cumulative compaction的输出rowset总磁盘大小超过了此配置大小,该rowset将用于base compaction。单位是m字节。 -* 默认值:1024 - -一般情况下,配置在2G以内,为了防止cumulative compaction时间过长,导致版本积压。 - -### `cumulative_size_based_promotion_ratio` - -* 类型:double -* 描述:在size_based策略下,cumulative compaction的输出rowset总磁盘大小超过base版本rowset的配置比例时,该rowset将用于base compaction。 -* 默认值:0.05 - -一般情况下,建议配置不要高于0.1,低于0.02。 - -### `cumulative_size_based_promotion_min_size_mbytes` - -* 类型:int64 -* 描述:在size_based策略下,cumulative compaction的输出rowset总磁盘大小低于此配置大小,该rowset将不进行base compaction,仍然处于cumulative compaction流程中。单位是m字节。 -* 默认值:64 - -一般情况下,配置在512m以内,配置过大会导致base版本早期的大小过小,一直不进行base compaction。 - -### `cumulative_size_based_compaction_lower_size_mbytes` - -* 类型:int64 -* 描述:在size_based策略下,cumulative compaction进行合并时,选出的要进行合并的rowset的总磁盘大小大于此配置时,才按级别策略划分合并。小于这个配置时,直接执行合并。单位是m字节。 -* 默认值:64 - -一般情况下,配置在128m以内,配置过大会导致cumulative compaction写放大较多。 - -### `custom_config_dir` - -配置 `be_custom.conf` 文件的位置。默认为 `conf/` 目录下。 - -在某些部署环境下,`conf/` 目录可能因为系统的版本升级被覆盖掉。这会导致用户在运行是持久化修改的配置项也被覆盖。这时,我们可以将 `be_custom.conf` 存储在另一个指定的目录中,以防止配置文件被覆盖。 - -### `default_num_rows_per_column_file_block` -* 类型:int32 -* 描述:配置单个RowBlock之中包含多少行的数据。 -* 默认值:1024 - -### `default_rowset_type` -* 类型:string -* 描述:标识BE默认选择的存储格式,可配置的参数为:"**ALPHA**", "**BETA**"。主要起以下两个作用 -1. 当建表的storage_format设置为Default时,通过该配置来选取BE的存储格式。 -2. 进行Compaction时选择BE的存储格式 -* 默认值:BETA - -### `delete_worker_count` - -默认值:3 - -执行数据删除任务的线程数 - -### `disable_mem_pools` - -默认值:false - -是否禁用内存缓存池,默认不禁用 - -### `disable_storage_page_cache` - -* 类型:bool -* 描述:是否进行使用page cache进行index的缓存,该配置仅在BETA存储格式时生效 -* 默认值:false - -### `disk_stat_monitor_interval` - -默认值:5 (s) - -磁盘状态检查时间间隔 - -### `doris_cgroups` - -默认值:空 - -分配给doris的cgroups - -### `doris_max_pushdown_conjuncts_return_rate` - -* 类型:int32 -* 描述:BE在进行HashJoin时,会采取动态分区裁剪的方式将join条件下推到OlapScanner上。当OlapScanner扫描的数据大于32768行时,BE会进行过滤条件检查,如果该过滤条件的过滤率低于该配置,则Doris会停止使用动态分区裁剪的条件进行数据过滤。 -* 默认值:90 - - -### `doris_max_scan_key_num` - -* 类型:int -* 描述:用于限制一个查询请求中,scan node 节点能拆分的最大 scan key 的个数。当一个带有条件的查询请求到达 scan node 节点时,scan node 会尝试将查询条件中 key 列相关的条件拆分成多个 scan key range。之后这些 scan key range 会被分配给多个 scanner 线程进行数据扫描。较大的数值通常意味着可以使用更多的 scanner 线程来提升扫描操作的并行度。但在高并发场景下,过多的线程可能会带来更大的调度开销和系统负载,反而会降低查询响应速度。一个经验数值为 50。该配置可以单独进行会话级别的配置,具体可参阅 [变量](../variables.md) 中 `max_scan_key_num` 的说明。 -* 默认值:1024 - -当在高并发场景下发下并发度无法提升时,可以尝试降低该数值并观察影响。 - -### `doris_scan_range_row_count` - -* 类型:int32 -* 描述:BE在进行数据扫描时,会将同一个扫描范围拆分为多个ScanRange。该参数代表了每个ScanRange代表扫描数据范围。通过该参数可以限制单个OlapScanner占用io线程的时间。 -* 默认值:524288 - -### `doris_scanner_queue_size` - -* 类型:int32 -* 描述:TransferThread与OlapScanner之间RowBatch的缓存队列的长度。Doris进行数据扫描时是异步进行的,OlapScanner扫描上来的Rowbatch会放入缓存队列之中,等待上层TransferThread取走。 -* 默认值:1024 - -### `doris_scanner_row_num` - -默认值:16384 - -每个扫描线程单次执行最多返回的数据行数 - -### `doris_scanner_thread_pool_queue_size` - -* 类型:int32 -* 描述:Scanner线程池的队列长度。在Doris的扫描任务之中,每一个Scanner会作为一个线程task提交到线程池之中等待被调度,而提交的任务数目超过线程池队列的长度之后,后续提交的任务将阻塞直到队列之中有新的空缺。 -* 默认值:102400 - -### `doris_scanner_thread_pool_thread_num` - -* 类型:int32 -* 描述:Scanner线程池线程数目。在Doris的扫描任务之中,每一个Scanner会作为一个线程task提交到线程池之中等待被调度,该参数决定了Scanner线程池的大小。 -* 默认值:48 - -### `download_low_speed_limit_kbps` - -默认值:50 (KB/s) - -下载最低限速 - -### `download_low_speed_time` - -默认值:300 (s) - -下载时间限制,默认300秒 - -### `download_worker_count` - -默认值:1 - -下载线程数,默认1个 - -### `drop_tablet_worker_count` - -默认值:3 - -删除tablet的线程数 - -### `enable_metric_calculator` - -默认值:true - -如果设置为 true,metric calculator 将运行,收集BE相关指标信息,如果设置成false将不运行 - -### `enable_partitioned_aggregation` - -* 类型:bool -* 描述:BE节点是否通过PartitionAggregateNode来实现聚合操作,如果false的话将会执行AggregateNode完成聚合。非特殊需求场景不建议设置为false。 -* 默认值:true - -### `enable_prefetch` - -* 类型:bool -* 描述:当使用PartitionedHashTable进行聚合和join计算时,是否进行HashBuket的预取,推荐设置为true。 -* 默认值:true - -### `enable_quadratic_probing` - -* 类型:bool -* 描述:当使用PartitionedHashTable时发生Hash冲突时,是否采用平方探测法来解决Hash冲突。该值为false的话,则选用线性探测发来解决Hash冲突。关于平方探测法可参考:[quadratic_probing](https://en.wikipedia.org/wiki/Quadratic_probing) -* 默认值:true - -### `enable_system_metrics` - -默认值:true - -用户控制打开和关闭系统指标 - -### `enable_token_check` - -默认值:true - -用于向前兼容,稍后将被删除 - -### `es_http_timeout_ms` - -默认值:5000 (ms) - -通过http连接ES的超时时间,默认是5秒 - -### `es_scroll_keepalive` - -默认值:5m - -es scroll Keeplive保持时间,默认5分钟 - -### `etl_thread_pool_queue_size` - -默认值:256 - -ETL线程池的大小 - -### `etl_thread_pool_size` - -### `exchg_node_buffer_size_bytes` - -* 类型:int32 -* 描述:ExchangeNode节点Buffer队列的大小,单位为byte。来自Sender端发送的数据量大于ExchangeNode的Buffer大小之后,后续发送的数据将阻塞直到Buffer腾出可写入的空间。 -* 默认值:10485760 - -### `file_descriptor_cache_capacity` - -默认值:32768 - -文件句柄缓存的容量,默认缓存32768个文件句柄 - -### `cache_clean_interval` - -默认值:1800 (s) - -文件句柄缓存清理的间隔,用于清理长期不用的文件句柄。 -同时也是Segment Cache的清理间隔时间。 - -### `flush_thread_num_per_store` - -默认值:2 - -每个store用于刷新内存表的线程数 - -### `force_recovery` - -### `fragment_pool_queue_size` - -默认值:2048 - -单节点上能够处理的查询请求上限 - -### `fragment_pool_thread_num_min` - -默认值:64 - -### `fragment_pool_thread_num_max` - -默认值:256 - -查询线程数,默认最小启动64个线程,后续查询请求动态创建线程,最大创建256个线程 - -### `heartbeat_service_port` - -* 类型:int32 -* 描述:BE 上心跳服务端口(thrift),用于接收来自 FE 的心跳 -* 默认值:9050 - -### `heartbeat_service_thread_count` - -* 类型:int32 -* 描述:执行BE上心跳服务的线程数,默认为1,不建议修改 -* 默认值:1 - -### `ignore_broken_disk` - -​ 当BE启动时,会检查``storage_root_path`` 配置下的所有路径。 - - - `ignore_broken_disk=true` - - 如果路径不存在或路径下无法进行读写文件(坏盘),将忽略此路径,如果有其他可用路径则不中断启动。 - - - `ignore_broken_disk=false` - - 如果路径不存在或路径下无法进行读写文件(坏盘),将中断启动失败退出。 - -​ 默认为false - -### `ignore_load_tablet_failure` - -* 类型:bool -* 描述:用来决定在有tablet 加载失败的情况下是否忽略错误,继续启动be -* 默认值:false - -BE启动时,会对每个数据目录单独启动一个线程进行 tablet header 元信息的加载。默认配置下,如果某个数据目录有 tablet 加载失败,则启动进程会终止。同时会在 `be.INFO` 日志中看到如下错误信息: - -``` -load tablets from header failed, failed tablets size: xxx, path=xxx -``` - -表示该数据目录共有多少 tablet 加载失败。同时,日志中也会有加载失败的 tablet 的具体信息。此时需要人工介入来对错误原因进行排查。排查后,通常有两种方式进行恢复: - -1. tablet 信息不可修复,在确保其他副本正常的情况下,可以通过 `meta_tool` 工具将错误的tablet删除。 -2. 将 `ignore_load_tablet_failure` 设置为 true,则 BE 会忽略这些错误的 tablet,正常启动。 - -### `ignore_rowset_stale_unconsistent_delete` - -* 类型:bool -* 描述:用来决定当删除过期的合并过的rowset后无法构成一致的版本路径时,是否仍要删除。 -* 默认值:false - -合并的过期 rowset 版本路径会在半个小时后进行删除。在异常下,删除这些版本会出现构造不出查询一致路径的问题,当配置为false时,程序检查比较严格,程序会直接报错退出。 -当配置为true时,程序会正常运行,忽略这个错误。一般情况下,忽略这个错误不会对查询造成影响,仅会在fe下发了合并过的版本时出现-230错误。 - -### `inc_rowset_expired_sec` - -默认值:1800 (s) - -导入激活的数据,存储引擎保留的时间,用于增量克隆 - -### `index_stream_cache_capacity` - -默认值:10737418240 - -BloomFilter/Min/Max等统计信息缓存的容量 - -### `kafka_broker_version_fallback` - -默认值:0.10.0 - -如果依赖的 kafka 版本低于routine load依赖的 kafka 客户端版本, 将使用回退版本 kafka_broker_version_fallback 设置的值,有效值为:0.9.0、0.8.2、0.8.1、0.8.0。 - -### `load_data_reserve_hours` - -默认值:4 (小时) - -用于mini load。mini load数据文件将在此时间后被删除 - -### `load_error_log_reserve_hours` - -默认值:48(小时) - -load错误日志将在此时间后删除 - -### `load_process_max_memory_limit_bytes` - -默认值:107374182400 - -单节点上所有的导入线程占据的内存上限,默认值:100G - -将这些默认值设置得很大,因为我们不想在用户升级 Doris 时影响负载性能。 如有必要,用户应正确设置这些配置。 - -### `load_process_max_memory_limit_percent` - -默认值:80 - -单节点上所有的导入线程占据的内存上限比例,默认80% - -将这些默认值设置得很大,因为我们不想在用户升级 Doris 时影响负载性能。 如有必要,用户应正确设置这些配置。 - -### `log_buffer_level` - -默认值:空 - -日志刷盘的策略,默认保持在内存中 - -### `madvise_huge_pages` - -默认值:false - -是否使用linux内存大页,默认不启用 - -### `make_snapshot_worker_count` - -默认值:5 - -制作快照的线程数 - -### `max_client_cache_size_per_host` - -默认值:10 - -每个主机的最大客户端缓存数,BE 中有多种客户端缓存,但目前我们使用相同的缓存大小配置。 如有必要,使用不同的配置来设置不同的客户端缓存。 - -### `max_compaction_threads` - -* 类型:int32 -* 描述:Compaction线程池中线程数量的最大值。 -* 默认值:10 - -### `max_consumer_num_per_group` - -默认值:3 - -一个数据消费者组中的最大消费者数量,用于routine load - -### `min_cumulative_compaction_num_singleton_deltas` - -默认值:5 - -cumulative compaction策略:最小增量文件的数量 - -### `max_cumulative_compaction_num_singleton_deltas` - -默认值:1000 - -cumulative compaction策略:最大增量文件的数量 - -### `max_download_speed_kbps` - -默认值:50000 (kb/s) - -最大下载速度限制 - -### `max_free_io_buffers` - -默认值:128 - -对于每个 io 缓冲区大小,IoMgr 将保留的最大缓冲区数从 1024B 到 8MB 的缓冲区,最多约为 2GB 的缓冲区。 - -### `max_garbage_sweep_interval` - -默认值:3600 - -磁盘进行垃圾清理的最大间隔,默认一个小时 - -### `max_memory_sink_batch_count` - -默认值:20 - -最大外部扫描缓存批次计数,表示缓存max_memory_cache_batch_count * batch_size row,默认为20,batch_size的默认值为1024,表示将缓存20 * 1024行 - -### `max_percentage_of_error_disk` - -* 类型:int32 -* 描述:存储引擎允许存在损坏硬盘的百分比,损坏硬盘超过改比例后,BE将会自动退出。 -* 默认值:0 - -### `max_pushdown_conditions_per_column` - -* 类型:int -* 描述:用于限制一个查询请求中,针对单个列,能够下推到存储引擎的最大条件数量。在查询计划执行的过程中,一些列上的过滤条件可以下推到存储引擎,这样可以利用存储引擎中的索引信息进行数据过滤,减少查询需要扫描的数据量。比如等值条件、IN 谓词中的条件等。这个参数在绝大多数情况下仅影响包含 IN 谓词的查询。如 `WHERE colA IN (1,2,3,4,...)`。较大的数值意味值 IN 谓词中更多的条件可以推送给存储引擎,但过多的条件可能会导致随机读的增加,某些情况下可能会降低查询效率。该配置可以单独进行会话级别的配置,具体可参阅 [变量](../variables.md) 中 `max_pushdown_conditions_per_column ` 的说明。 -* 默认值:1024 - -* 示例 - - 表结构为 `id INT, col2 INT, col3 varchar(32), ...`。 - - 查询请求为 `... WHERE id IN (v1, v2, v3, ...)` - - 如果 IN 谓词中的条件数量超过了该配置,则可以尝试增加该配置值,观察查询响应是否有所改善。 - -### `max_runnings_transactions_per_txn_map` - -默认值:100 - -txn 管理器中每个 txn_partition_map 的最大 txns 数,这是一种自我保护,以避免在管理器中保存过多的 txns - -### `max_send_batch_parallelism_per_job` - -* 类型:int -* 描述:OlapTableSink 发送批处理数据的最大并行度,用户为 `send_batch_parallelism` 设置的值不允许超过 `max_send_batch_parallelism_per_job` ,如果超过, `send_batch_parallelism` 将被设置为 `max_send_batch_parallelism_per_job` 的值。 -* 默认值:5 - -### `max_tablet_num_per_shard` - -默认:1024 - -每个shard的tablet数目,用于划分tablet,防止单个目录下tablet子目录过多 - -### `max_tablet_version_num` - -* 类型:int -* 描述:限制单个 tablet 最大 version 的数量。用于防止导入过于频繁,或 compaction 不及时导致的大量 version 堆积问题。当超过限制后,导入任务将被拒绝。 -* 默认值:500 - -### `mem_limit` - -* 类型:string -* 描述:限制BE进程使用服务器最大内存百分比。用于防止BE内存挤占太多的机器内存,该参数必须大于0,当百分大于100%之后,该值会默认为100%。 -* 默认值:80% - -### `memory_limitation_per_thread_for_schema_change` - -默认值:2 (GB) - -单个schema change任务允许占用的最大内存 - -### `memory_maintenance_sleep_time_s` - -默认值:10 - -内存维护迭代之间的休眠时间(以秒为单位) - -### `memory_max_alignment` - -默认值:16 - -最大校对内存 - -### `read_size` - -默认值:8388608 - -读取大小是发送到 os 的读取大小。 在延迟和整个过程之间进行权衡,试图让磁盘保持忙碌但不引入寻道。 对于 8 MB 读取,随机 io 和顺序 io 的性能相似 - -### `min_buffer_size` - -默认值:1024 - -最小读取缓冲区大小(以字节为单位) - -### `min_compaction_failure_interval_sec` - -* 类型:int32 -* 描述:在 cumulative compaction 过程中,当选中的 tablet 没能成功的进行版本合并,则会等待一段时间后才会再次有可能被选中。等待的这段时间就是这个配置的值。 -* 默认值:5 -* 单位:秒 - -### `min_compaction_threads` - -* 类型:int32 -* 描述:Compaction线程池中线程数量的最小值。 -* 默认值:10 - -### `min_file_descriptor_number` - -默认值:60000 - -BE进程的文件句柄limit要求的下限 - -### `min_garbage_sweep_interval` - -默认值:180 - -磁盘进行垃圾清理的最小间隔,时间秒 - -### `mmap_buffers` - -默认值:false - -是否使用mmap分配内存,默认不使用 - -### `num_cores` - -* 类型:int32 -* 描述:BE可以使用CPU的核数。当该值为0时,BE将从/proc/cpuinfo之中获取本机的CPU核数。 -* 默认值:0 - -### `num_disks` - -默认值:0 - -控制机器上的磁盘数量。 如果为 0,则来自系统设置。 - -### `num_threads_per_core` - -默认值:3 - -控制每个内核运行工作的线程数。 通常选择 2 倍或 3 倍的内核数量。 这使核心保持忙碌而不会导致过度抖动 - -### `num_threads_per_disk` - -默认值:0 - -每个磁盘的最大线程数也是每个磁盘的最大队列深度 - -### `number_tablet_writer_threads` - -默认值:16 - -tablet写线程数 - -### `path_gc_check` - -默认值:true - -是否启用回收扫描数据线程检查,默认启用 - -### `path_gc_check_interval_second` - -默认值:86400 - -回收扫描数据线程检查时间间隔,单位秒 - -### `path_gc_check_step` - -默认值:1000 - -### `path_gc_check_step_interval_ms` - -默认值:10 (ms) - -### `path_scan_interval_second` - -默认值:86400 - -### `pending_data_expire_time_sec` - -默认值:1800 - -存储引擎保留的未生效数据的最大时长,默认单位:秒 - -### `periodic_counter_update_period_ms` - -默认值:500 - -更新速率计数器和采样计数器的周期,默认单位:毫秒 - -### `plugin_path` - -默认值:${DORIS_HOME}/plugin - -插件路径 - -### `port` - -* 类型:int32 -* 描述:BE单测时使用的端口,在实际环境之中无意义,可忽略。 -* 默认值:20001 - -### `pprof_profile_dir` - -默认值:${DORIS_HOME}/log - -pprof profile保存目录 - -### `priority_networks` - -默认值:空 - -为那些有很多 ip 的服务器声明一个选择策略。 请注意,最多应该有一个 ip 与此列表匹配。 这是一个以分号分隔格式的列表,用 CIDR 表示法,例如 10.10.10.0/24 , 如果没有匹配这条规则的ip,会随机选择一个。 - -### `priority_queue_remaining_tasks_increased_frequency` - -默认值:512 - - the increased frequency of priority for remaining tasks in BlockingPriorityQueue - -### `publish_version_worker_count` - -默认值:8 - -生效版本的线程数 - -### `pull_load_task_dir` - -默认值:${DORIS_HOME}/var/pull_load - -拉取laod任务的目录 - -### `push_worker_count_high_priority` - -默认值:3 - -导入线程数,用于处理HIGH优先级任务 - -### `push_worker_count_normal_priority` - -默认值:3 - -导入线程数,用于处理NORMAL优先级任务 - -### `push_write_mbytes_per_sec` - -+ 类型:int32 -+ 描述:导入数据速度控制,默认最快每秒10MB。适用于所有的导入方式。 -+ 单位:MB -+ 默认值:10 - -### `query_scratch_dirs` - -* 类型:string -* 描述:BE进行数据落盘时选取的目录来存放临时数据,与存储路径配置类似,多目录之间用;分隔。 -* 默认值:${DORIS_HOME} - -### `release_snapshot_worker_count` - -默认值:5 - -释放快照的线程数 - -### `report_disk_state_interval_seconds` - -默认值:60 - -代理向 FE 报告磁盘状态的间隔时间(秒) - -### `report_tablet_interval_seconds` - -默认值:60 - -代理向 FE 报告 olap 表的间隔时间(秒) - -### `report_task_interval_seconds` - -默认值:10 - -代理向 FE 报告任务签名的间隔时间(秒) - -### `result_buffer_cancelled_interval_time` - -默认值:300 - -结果缓冲区取消时间(单位:秒) - -### `routine_load_thread_pool_size` - -默认值:10 - -routine load任务的线程池大小。 这应该大于 FE 配置 'max_concurrent_task_num_per_be'(默认 5) - -### `row_nums_check` - -默认值:true - -检查 BE/CE 和schema更改的行号。 true 是打开的,false 是关闭的。 - -### `row_step_for_compaction_merge_log` - -* 类型:int64 -* 描述:Compaction执行过程中,每次合并row_step_for_compaction_merge_log行数据会打印一条LOG。如果该参数被设置为0,表示merge过程中不需要打印LOG。 -* 默认值: 0 -* 可动态修改:是 - -### `scan_context_gc_interval_min` - -默认值:5 - -此配置用于上下文gc线程调度周期 , 注意:单位为分钟,默认为 5 分钟 - -### `send_batch_thread_pool_thread_num` - -* 类型:int32 -* 描述:SendBatch线程池线程数目。在NodeChannel的发送数据任务之中,每一个NodeChannel的SendBatch操作会作为一个线程task提交到线程池之中等待被调度,该参数决定了SendBatch线程池的大小。 -* 默认值:256 - -### `send_batch_thread_pool_queue_size` - -* 类型:int32 -* 描述:SendBatch线程池的队列长度。在NodeChannel的发送数据任务之中,每一个NodeChannel的SendBatch操作会作为一个线程task提交到线程池之中等待被调度,而提交的任务数目超过线程池队列的长度之后,后续提交的任务将阻塞直到队列之中有新的空缺。 -* 默认值:102400 - -### `serialize_batch` - -默认值:false - -BE之间rpc通信是否序列化RowBatch,用于查询层之间的数据传输 - -### `sleep_one_second` -+ 类型:int32 -+ 描述:全局变量,用于BE线程休眠1秒,不应该被修改 -+ 默认值:1 - -### `small_file_dir` - -默认值:${DORIS_HOME}/lib/small_file/ - -用于保存 SmallFileMgr 下载的文件的目录 - -### `snapshot_expire_time_sec` - -默认值:172800 - -快照文件清理的间隔,默认值:48小时 - -### `status_report_interval` - -默认值:5 - -配置文件报告之间的间隔;单位:秒 - -### `storage_flood_stage_left_capacity_bytes` - -默认值:1073741824 - -数据目录应该剩下的最小存储空间,默认1G - -### `storage_flood_stage_usage_percent` - -默认值:95 (95%) - -storage_flood_stage_usage_percent和storage_flood_stage_left_capacity_bytes两个配置限制了数据目录的磁盘容量的最大使用。 如果这两个阈值都达到,则无法将更多数据写入该数据目录。 数据目录的最大已用容量百分比 - -### `storage_medium_migrate_count` - -默认值:1 - -要克隆的线程数 - -### `storage_page_cache_limit` - -默认值:20% - -缓存存储页大小 - -### `index_page_cache_percentage` -* 类型:int32 -* 描述:索引页缓存占总页面缓存的百分比,取值为[0, 100]。 -* 默认值:10 - -### `storage_root_path` - -* 类型:string - -* 描述:BE数据存储的目录,多目录之间用英文状态的分号`;`分隔。可以通过路径区别存储目录的介质,HDD或SSD。可以添加容量限制在每个路径的末尾,通过英文状态逗号`,`隔开。 - - 示例1如下: - - **注意:如果是SSD磁盘要在目录后面加上`.SSD`,HDD磁盘在目录后面加`.HDD`** - - `storage_root_path=/home/disk1/doris.HDD,50;/home/disk2/doris.SSD,10;/home/disk2/doris` - - * /home/disk1/doris.HDD,50,表示存储限制为50GB,HDD; - * /home/disk2/doris.SSD,10,存储限制为10GB,SSD; - * /home/disk2/doris,存储限制为磁盘最大容量,默认为HDD - - 示例2如下: - - **注意:不论HHD磁盘目录还是SSD磁盘目录,文件夹目录名称都无需添加后缀,storage_root_path参数里指定medium即可** - - `storage_root_path=/home/disk1/doris,medium:hdd,capacity:50;/home/disk2/doris,medium:ssd,capacity:50` - - **说明** - - - /home/disk1/doris,medium:hdd,capacity:10,表示存储限制为10GB, HHD; - - /home/disk2/doris,medium:ssd,capacity:50,表示存储限制为50GB, SSD; - - -* 默认值:${DORIS_HOME} - -### `storage_strict_check_incompatible_old_format` -* 类型:bool -* 描述:用来检查不兼容的旧版本格式时是否使用严格的验证方式 -* 默认值: true -* 可动态修改:否 - -配置用来检查不兼容的旧版本格式时是否使用严格的验证方式,当含有旧版本的 hdr 格式时,使用严谨的方式时,程序会 -打出 fatal log 并且退出运行;否则,程序仅打印 warn log. - -### `streaming_load_max_mb` - -* 类型:int64 -* 描述:用于限制数据格式为 csv 的一次 Stream load 导入中,允许的最大数据量。单位 MB。 -* 默认值: 10240 -* 可动态修改:是 - -Stream Load 一般适用于导入几个GB以内的数据,不适合导入过大的数据。 - -### `streaming_load_json_max_mb` - -* 类型:int64 -* 描述:用于限制数据格式为 json 的一次 Stream load 导入中,允许的最大数据量。单位 MB。 -* 默认值: 100 -* 可动态修改:是 - -一些数据格式,如 JSON,无法进行拆分处理,必须读取全部数据到内存后才能开始解析,因此,这个值用于限制此类格式数据单次导入最大数据量。 - -### `streaming_load_rpc_max_alive_time_sec` - -默认值:1200 - -TabletsChannel 的存活时间。如果此时通道没有收到任何数据, 通道将被删除。 - -### `sync_tablet_meta` - -默认值:false - -存储引擎是否开sync保留到磁盘上 - -### `sys_log_dir` - -* 类型:string -* 描述:BE日志数据的存储目录 -* 默认值:${DORIS_HOME}/log - -### `sys_log_level` - -默认值:INFO - -日志级别,INFO < WARNING < ERROR < FATAL - -### `sys_log_roll_mode` - -默认值:SIZE-MB-1024 - -日志拆分的大小,每1G拆分一个日志文件 - -### `sys_log_roll_num` - -默认值:10 - -日志文件保留的数目 - -### `sys_log_verbose_level` - -默认值:10 - -日志显示的级别,用于控制代码中VLOG开头的日志输出 - -### `sys_log_verbose_modules` - -默认值:空 - -日志打印的模块,写olap就只打印olap模块下的日志 - -### `tablet_map_shard_size` - -默认值:1 - -tablet_map_lock 分片大小,值为 2^n, n=0,1,2,3,4 ,这是为了更好地管理tablet - -### `tablet_meta_checkpoint_min_interval_secs` - -默认值:600 (秒) - -TabletMeta Checkpoint线程轮询的时间间隔 - -### `tablet_meta_checkpoint_min_new_rowsets_num` - -默认值:10 - -TabletMeta Checkpoint的最小Rowset数目 - -### `tablet_scan_frequency_time_node_interval_second` - -* 类型:int64 -* 描述:用来表示记录 metric 'query_scan_count' 的时间间隔。为了计算当前一段时间的tablet的scan频率,需要每隔一段时间记录一次 metric 'query_scan_count'。 -* 默认值:300 - -### `tablet_stat_cache_update_interval_second` - -默认值:300 - -tablet状态缓存的更新间隔,单位:秒 - -### `tablet_rowset_stale_sweep_time_sec` - -* 类型:int64 -* 描述:用来表示清理合并版本的过期时间,当当前时间 now() 减去一个合并的版本路径中rowset最近创建创建时间大于tablet_rowset_stale_sweep_time_sec时,对当前路径进行清理,删除这些合并过的rowset, 单位为s。 -* 默认值:1800 - -当写入过于频繁,磁盘空间不足时,可以配置较少这个时间。不过这个时间过短小于5分钟时,可能会引发fe查询不到已经合并过的版本,引发查询-230错误。 - -### `tablet_writer_open_rpc_timeout_sec` - -默认值:60 - -在远程BE 中打开tablet writer的 rpc 超时。 操作时间短,可设置短超时时间 - -### `tablet_writer_ignore_eovercrowded` - -* 类型:bool -* 描述:写入时可忽略brpc的'[E1011]The server is overcrowded'错误。 -* 默认值:false - -当遇到'[E1011]The server is overcrowded'的错误时,可以调整配置项`brpc_socket_max_unwritten_bytes`,但这个配置项不能动态调整。所以可通过设置此项为`true`来临时避免写失败。注意,此配置项只影响写流程,其他的rpc请求依旧会检查是否overcrowded。 - -### `tc_free_memory_rate` - -默认值:20 (%) - -可用内存,取值范围:[0-100] - -### `tc_max_total_thread_cache_bytes` - -* 类型:int64 -* 描述:用来限制 tcmalloc 中总的线程缓存大小。这个限制不是硬限,因此实际线程缓存使用可能超过这个限制。具体可参阅 [TCMALLOC\_MAX\_TOTAL\_THREAD\_CACHE\_BYTES](https://gperftools.github.io/gperftools/tcmalloc.html) -* 默认值: 1073741824 - -如果发现系统在高压力场景下,通过 BE 线程堆栈发现大量线程处于 tcmalloc 的锁竞争阶段,如大量的 `SpinLock` 相关堆栈,则可以尝试增大该参数来提升系统性能。[参考](https://github.com/gperftools/gperftools/issues/1111) - -### `tc_use_memory_min` - -默认值:10737418240 - -TCmalloc 的最小内存,当使用的内存小于这个时,不返回给操作系统 - -### `thrift_client_retry_interval_ms` - -* 类型:int64 -* 描述:用来为be的thrift客户端设置重试间隔, 避免fe的thrift server发生雪崩问题,单位为ms。 -* 默认值:1000 - -### `thrift_connect_timeout_seconds` - -默认值:3 - -默认thrift客户端连接超时时间(单位:秒) - -### `thrift_rpc_timeout_ms` - -默认值:5000 - -thrift默认超时时间,默认:5秒 - -### `thrift_server_type_of_fe` - -该配置表示FE的Thrift服务使用的服务模型, 类型为string, 大小写不敏感,该参数需要和fe的thrift_server_type参数的设置保持一致。目前该参数的取值有两个,`THREADED`和`THREAD_POOL`。 - -若该参数为`THREADED`, 该模型为非阻塞式I/O模型, - -若该参数为`THREAD_POOL`, 该模型为阻塞式I/O模型。 - -### `total_permits_for_compaction_score` - -* 类型:int64 -* 描述:被所有的compaction任务所能持有的 "permits" 上限,用来限制compaction占用的内存。 -* 默认值:10000 -* 可动态修改:是 - -### `trash_file_expire_time_sec` - -默认值:259200 - -回收站清理的间隔,72个小时,当磁盘空间不足时,trash下的文件保存期可不遵守这个参数 - -### `txn_commit_rpc_timeout_ms` - -默认值:10000 - -txn 提交 rpc 超时,默认10秒 - -### `txn_map_shard_size` - -默认值:128 - -txn_map_lock 分片大小,取值为2^n,n=0,1,2,3,4。这是一项增强功能,可提高管理 txn 的性能 - -### `txn_shard_size` - -默认值:1024 - -txn_lock 分片大小,取值为2^n,n=0,1,2,3,4, 这是一项增强功能,可提高提交和发布 txn 的性能 - -### `unused_rowset_monitor_interval` - -默认值:30 - -清理过期Rowset的时间间隔,单位:秒 - -### `upload_worker_count` - -默认值:1 - -上传文件最大线程数 - -### `use_mmap_allocate_chunk` - -默认值:false - -是否使用 mmap 分配块。 如果启用此功能,最好增加 vm.max_map_count 的值,其默认值为 65530。您可以通过“sysctl -w vm.max_map_count=262144”或“echo 262144 > /proc/sys/vm/”以 root 身份进行操作 max_map_count" ,当这个设置为true时,你必须将chunk_reserved_bytes_limit设置为一个相对较大的数字,否则性能非常非常糟糕。 - -### `user_function_dir` - -默认值:${DORIS_HOME}/lib/udf - -udf函数目录 - -### `webserver_num_workers` - -默认值:48 - -webserver默认工作线程数 - -### `webserver_port` -* 类型:int32 -* 描述:BE 上的 http server 的服务端口 -* 默认值:8040 - -### `write_buffer_size` - -默认值:104857600 - -刷写前缓冲区的大小 - -### `zone_map_row_num_threshold` - -* 类型: int32 -* 描述: 如果一个page中的行数小于这个值就不会创建zonemap,用来减少数据膨胀 -* 默认值: 20 - -### `aws_log_level` - -* 类型: int32 -* 描述: AWS SDK 的日志级别 - ``` - Off = 0, - Fatal = 1, - Error = 2, - Warn = 3, - Info = 4, - Debug = 5, - Trace = 6 - ``` -* 默认值: 3 - -### `track_new_delete` - -* 类型:bool -* 描述:是否Hook TCmalloc new/delete,目前在Hook中统计thread local MemTracker。 -* 默认值:true - -### `mem_tracker_level` - -* 类型: int16 -* 描述: MemTracker在Web页面上展示的级别,等于或低于这个级别的MemTracker会在Web页面上展示 - ``` - OVERVIEW = 0 - TASK = 1 - INSTANCE = 2 - VERBOSE = 3 - ``` -* 默认值: 0 - -### `mem_tracker_consume_min_size_bytes` - -* 类型: int32 -* 描述: TCMalloc Hook consume/release MemTracker时的最小长度,小于该值的consume size会持续累加,避免频繁调用MemTracker的consume/release,减小该值会增加consume/release的频率,增大该值会导致MemTracker统计不准,理论上一个MemTracker的统计值与真实值相差 = (mem_tracker_consume_min_size_bytes * 这个MemTracker所在的BE线程数)。 -* 默认值: 1048576 - -### `memory_leak_detection` - -* 类型: bool -* 描述: 是否启动内存泄漏检测,当 MemTracker 为负值时认为发生了内存泄漏,但实际 MemTracker 记录不准确时也会导致负值,所以这个功能处于实验阶段。 -* 默认值: false - -### `max_segment_num_per_rowset` - -* 类型: int32 -* 描述: 用于限制导入时,新产生的rowset中的segment数量。如果超过阈值,导入会失败并报错 -238。过多的 segment 会导致compaction占用大量内存引发 OOM 错误。 -* 默认值: 200 - -### `remote_storage_read_buffer_mb` - -* 类型: int32 -* 描述: 读取hdfs或者对象存储上的文件时,使用的缓存大小。 -* 默认值: 16MB - -增大这个值,可以减少远端数据读取的调用次数,但会增加内存开销。 - -### `external_table_connect_timeout_sec` - -* 类型: int32 -* 描述: 和外部表建立连接的超时时间。 -* 默认值: 5秒 - -### `segment_cache_capacity` - -* 类型: int32 -* 描述: Segment Cache 缓存的 Segment 最大数量 -* 默认值: 1000000 - -默认值目前只是一个经验值,可能需要根据实际场景修改。增大该值可以缓存更多的segment从而避免一些IO。减少该值则会降低内存使用。 - -### `auto_refresh_brpc_channel` - -* 类型: bool -* 描述: 获取brpc连接时,通过hand_shake rpc 判断连接的可用性,如果不可用则重新建立连接 -* 默认值: false - -### `high_priority_flush_thread_num_per_store` - -* 类型:int32 -* 描述:每个存储路径所分配的用于高优导入任务的 flush 线程数量。 -* 默认值:1 - -### `routine_load_consumer_pool_size` - -* 类型:int32 -* 描述:routine load 所使用的 data consumer 的缓存数量。 -* 默认值:10 - -### `load_task_high_priority_threshold_second` - -* 类型:int32 -* 描述:当一个导入任务的超时时间小于这个阈值是,Doris 将认为他是一个高优任务。高优任务会使用独立的 flush 线程池。 -* 默认:120 - -### `min_load_rpc_timeout_ms` - -* 类型:int32 -* 描述:load 作业中各个rpc 的最小超时时间。 -* 默认:20 - -### `doris_scan_range_max_mb` -* 类型: int32 -* 描述: 每个OlapScanner 读取的最大数据量 -* 默认值: 1024 - -### `string_type_length_soft_limit_bytes` -* 类型: int32 -* 描述: String 类型最大长度的软限,单位是字节 -* 默认值: 1048576 diff --git a/docs/zh-CN/administrator-guide/config/fe_config.md b/docs/zh-CN/administrator-guide/config/fe_config.md deleted file mode 100644 index 8b14b7aa18..0000000000 --- a/docs/zh-CN/administrator-guide/config/fe_config.md +++ /dev/null @@ -1,2234 +0,0 @@ ---- -{ - "title": "FE 配置项", - "language": "zh-CN" -} - ---- - - - -# Doris FE配置参数 - -该文档主要介绍 FE 的相关配置项。 - -FE 的配置文件 `fe.conf` 通常存放在 FE 部署路径的 `conf/` 目录下。 而在 0.14 版本中会引入另一个配置文件 `fe_custom.conf`。该配置文件用于记录用户在运行是动态配置并持久化的配置项。 - -FE 进程启动后,会先读取 `fe.conf` 中的配置项,之后再读取 `fe_custom.conf` 中的配置项。`fe_custom.conf` 中的配置项会覆盖 `fe.conf` 中相同的配置项。 - -`fe_custom.conf` 文件的位置可以在 `fe.conf` 通过 `custom_config_dir` 配置项配置。 - -## 查看配置项 - -FE 的配置项有两种方式进行查看: - -1. FE 前端页面查看 - - 在浏览器中打开 FE 前端页面 `http://fe_host:fe_http_port/variable`。在 `Configure Info` 中可以看到当前生效的 FE 配置项。 - -2. 通过命令查看 - - FE 启动后,可以在 MySQL 客户端中,通过以下命令查看 FE 的配置项: - - `ADMIN SHOW FRONTEND CONFIG;` - - 结果中各列含义如下: - - - Key:配置项名称。 - - Value:当前配置项的值。 - - Type:配置项值类型,如果整型、字符串。 - - IsMutable:是否可以动态配置。如果为 true,表示该配置项可以在运行时进行动态配置。如果false,则表示该配置项只能在 `fe.conf` 中配置并且重启 FE 后生效。 - - MasterOnly:是否为 Master FE 节点独有的配置项。如果为 true,则表示该配置项仅在 Master FE 节点有意义,对其他类型的 FE 节点无意义。如果为 false,则表示该配置项在所有 FE 节点中均有意义。 - - Comment:配置项的描述。 - -## 设置配置项 - -FE 的配置项有两种方式进行配置: - -1. 静态配置 - - 在 `conf/fe.conf` 文件中添加和设置配置项。`fe.conf` 中的配置项会在 FE 进程启动时被读取。没有在 `fe.conf` 中的配置项将使用默认值。 - -2. 通过 MySQL 协议动态配置 - - FE 启动后,可以通过以下命令动态设置配置项。该命令需要管理员权限。 - - `ADMIN SET FRONTEND CONFIG ("fe_config_name" = "fe_config_value");` - - 不是所有配置项都支持动态配置。可以通过 `ADMIN SHOW FRONTEND CONFIG;` 命令结果中的 `IsMutable` 列查看是否支持动态配置。 - - 如果是修改 `MasterOnly` 的配置项,则该命令会直接转发给 Master FE 并且仅修改 Master FE 中对应的配置项。 - - **通过该方式修改的配置项将在 FE 进程重启后失效。** - - 更多该命令的帮助,可以通过 `HELP ADMIN SET CONFIG;` 命令查看。 - -3. 通过 HTTP 协议动态配置 - - 具体请参阅 [Set Config Action](http://doris.apache.org/master/zh-CN/administrator-guide/http-actions/fe/set-config-action.html) - - 该方式也可以持久化修改后的配置项。配置项将持久化在 `fe_custom.conf` 文件中,在 FE 重启后仍会生效。 - -## 应用举例 - -1. 修改 `async_pending_load_task_pool_size` - - 通过 `ADMIN SHOW FRONTEND CONFIG;` 可以查看到该配置项不能动态配置(`IsMutable` 为 false)。则需要在 `fe.conf` 中添加: - - `async_pending_load_task_pool_size=20` - - 之后重启 FE 进程以生效该配置。 - -2. 修改 `dynamic_partition_enable` - - 通过 `ADMIN SHOW FRONTEND CONFIG;` 可以查看到该配置项可以动态配置(`IsMutable` 为 true)。并且是 Master FE 独有配置。则首先我们可以连接到任意 FE,执行如下命令修改配置: - - ```text - ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true");` - ``` - - 之后可以通过如下命令查看修改后的值: - - ```text - set forward_to_master=true; - ADMIN SHOW FRONTEND CONFIG; - ``` - - 通过以上方式修改后,如果 Master FE 重启或进行了 Master 切换,则配置将失效。可以通过在 `fe.conf` 中直接添加配置项,并重启 FE 后,永久生效该配置项。 - -3. 修改 `max_distribution_pruner_recursion_depth` - - 通过 `ADMIN SHOW FRONTEND CONFIG;` 可以查看到该配置项可以动态配置(`IsMutable` 为 true)。并且不是 Master FE 独有配置。 - - 同样,我们可以通过动态修改配置的命令修改该配置。因为该配置不是 Master FE 独有配置,所以需要单独连接到不同的 FE,进行动态修改配置的操作,这样才能保证所有 FE 都使用了修改后的配置值 - -## 配置项列表 - -### `max_dynamic_partition_num` - -默认值:500 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -用于限制创建动态分区表时可以创建的最大分区数,避免一次创建过多分区。 数量由动态分区参数中的“开始”和“结束”决定。 - -### `grpc_max_message_size_bytes` - -默认值:1G - -用于设置 GRPC 客户端通道的初始流窗口大小,也用于设置最大消息大小。当结果集较大时,可能需要增大该值。 - -### `min_replication_num_per_tablet` - -默认值:1 - -用于设置单个tablet的最小replication数量。 - -### `max_replication_num_per_tablet` - -默认值:32767 - -用于设置单个 tablet 的最大 replication 数量。 - -### `enable_outfile_to_local` - -默认值:false - -是否允许 outfile 函数将结果导出到本地磁盘 - -### `enable_access_file_without_broker` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -此配置用于在通过代理访问 bos 或其他云存储时尝试跳过代理 - -### `enable_bdbje_debug_mode` - -默认值:false - -如果设置为 true,FE 将在 BDBJE 调试模式下启动,在 Web 页面 `System->bdbje` 可以查看相关信息,否则不可以查看 - -### `enable_alpha_rowset` - -默认值:false - -是否支持创建 alpha rowset。默认为 false,只应在紧急情况下使用,此配置应在未来的某个版本中删除 - -### `enable_http_server_v2` - -默认值:从官方 0.14.0 release 版之后默认是 true,之前默认 false - -HTTP Server V2 由 SpringBoot 实现, 并采用前后端分离的架构。只有启用 httpv2,用户才能使用新的前端 UI 界面 - -### `jetty_server_acceptors` - -默认值:2 - -### `jetty_server_selectors` - -默认值:4 - -### `jetty_server_workers` - -默认值:0 - -Jetty 的线程数量由以上三个参数控制。Jetty的线程架构模型非常简单,分为 acceptors、selectors 和 workers 三个线程池。acceptors 负责接受新连接,然后交给 selectors 处理HTTP消息协议的解包,最后由 workers 处理请求。前两个线程池采用非阻塞模型,一个线程可以处理很多 socket 的读写,所以线程池数量较小。 - -大多数项目,acceptors 线程只需要4个,selectors 线程配置4个足矣。workers 是阻塞性的业务逻辑,往往有较多的数据库操作,需要的线程数量较多,具体数量随应用程序的 QPS 和 IO 事件占比而定。QPS 越高,需要的线程数量越多,IO 占比越高,等待的线程数越多,需要的总线程数也越多。 - -workers 线程池默认不做设置,根据自己需要进行设置 - -### `jetty_threadPool_minThreads` - -Jetty线程池最小线程数,默认为20 - -### `jetty_threadPool_maxThreads` - -Jetty线程池最大线程数,默认为400 - -### `jetty_server_max_http_post_size` - -默认值:100 * 1024 * 1024 (100MB) - -这个是 put 或 post 方法上传文件的最大字节数,默认值:100MB - -### **`disable_mini_load`** - -是否禁用mini load数据导入方式,默认是:true (禁用) - -### `default_max_filter_ratio` - -默认值:0 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -可过滤数据(由于数据不规则等原因)的最大百分比。默认值为0,表示严格模式,只要数据有一条被过滤掉整个导入失败 - -### `default_db_data_quota_bytes` - -默认值:1PB - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -用于设置默认数据库数据配额大小,设置单个数据库的配额大小可以使用: - -``` -设置数据库数据量配额,单位为B/K/KB/M/MB/G/GB/T/TB/P/PB -ALTER DATABASE db_name SET DATA QUOTA quota; -查看配置 -show data (其他用法:HELP SHOW DATA) -``` - -### `default_db_replica_quota_size` - -默认值:1073741824 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -用于设置默认数据库Replica数量配额大小,设置单个数据库配额大小可以使用: - -``` -设置数据库Replica数量配额 -ALTER DATABASE db_name SET REPLICA QUOTA quota; -查看配置 -show data (其他用法:HELP SHOW DATA) -``` - -### `enable_batch_delete_by_default` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -创建唯一表时是否添加删除标志列,具体原理参照官方文档:操作手册->数据导入->批量删除 - -### `recover_with_empty_tablet` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -在某些情况下,某些 tablet 可能会损坏或丢失所有副本。 此时数据已经丢失,损坏的 tablet 会导致整个查询失败,无法查询剩余的健康 tablet。 在这种情况下,您可以将此配置设置为 true。 系统会将损坏的 tablet 替换为空 tablet,以确保查询可以执行。 (但此时数据已经丢失,所以查询结果可能不准确) - -### `max_allowed_in_element_num_of_delete` - -默认值:1024 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -用于限制 delete 语句中 Predicate 的元素个数 - -### `cache_result_max_row_count` - -默认值:3000 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:false - -设置可以缓存的最大行数,详细的原理可以参考官方文档:操作手册->分区缓存 - -### `cache_last_version_interval_second` - -默认值:900 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:false - -缓存结果时上一版本的最小间隔,该参数区分离线更新和实时更新 - -### `cache_enable_partition_mode` - -默认值:true - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:false - -如果设置为 true,FE 将从 BE cache 中获取数据,该选项适用于部分分区的实时更新。 - -### `cache_enable_sql_mode` - -默认值:true - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:false - -如果设置为 true,FE 会启用 sql 结果缓存,该选项适用于离线数据更新场景 - -| | case1 | case2 | case3 | case4 | -| ---------------------- | ----- | ----- | ----- | ----- | -| enable_sql_cache | false | true | true | false | -| enable_partition_cache | false | false | true | true | - -### `min_clone_task_timeout_sec` 和 `max_clone_task_timeout_sec` - -默认值:最小3分钟,最大两小时 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -`min_clone_task_timeout_sec` 和 `max_clone_task_timeout_sec` 用于限制克隆任务的最小和最大超时间。 一般情况下,克隆任务的超时时间是通过数据量和最小传输速度(5MB/s)来估计的。 但在特殊情况下,您可能需要手动设置这两个配置,以确保克隆任务不会因超时而失败。 - -### `agent_task_resend_wait_time_ms` - -默认值:5000 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -当代理任务的创建时间被设置的时候,此配置将决定是否重新发送代理任务, 当且仅当当前时间减去创建时间大于 `agent_task_task_resend_wait_time_ms` 时,ReportHandler可以重新发送代理任务。 - -该配置目前主要用来解决 `PUBLISH_VERSION` 代理任务的重复发送问题, 目前该配置的默认值是5000,是个实验值,由于把代理任务提交到代理任务队列和提交到 BE 存在一定的时间延迟,所以调大该配置的值可以有效解决代理任务的重复发送问题, - -但同时会导致提交失败或者执行失败的代理任务再次被执行的时间延长。 - -### `enable_odbc_table` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -是否启用 ODBC 表,默认不启用,在使用的时候需要手动配置启用,该参数可以通过: - -`ADMIN SET FRONTEND CONFIG("key"="value") `方式进行设置 - -### `enable_spark_load` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -是否临时启用 spark load,默认不启用 - -### `disable_storage_medium_check` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果 disable_storage_medium_check 为true, ReportHandler 将不会检查 tablet 的存储介质, 并使得存储冷却功能失效,默认值为false。当您不关心 tablet 的存储介质是什么时,可以将值设置为true 。 - -### `drop_backend_after_decommission` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -该配置用于控制系统在成功下线(Decommission) BE 后,是否 Drop 该 BE。如果为 true,则在 BE 成功下线后,会删除掉该 BE 节点。如果为 false,则在 BE 成功下线后,该 BE 会一直处于 DECOMMISSION 状态,但不会被删除。 - -该配置在某些场景下可以发挥作用。假设一个 Doris 集群的初始状态为每个 BE 节点有一块磁盘。运行一段时间后,系统进行了纵向扩容,即每个 BE 节点新增2块磁盘。因为 Doris 当前还不支持 BE 内部各磁盘间的数据均衡,所以会导致初始磁盘的数据量可能一直远高于新增磁盘的数据量。此时我们可以通过以下操作进行人工的磁盘间均衡: - -1. 将该配置项置为 false。 -2. 对某一个 BE 节点,执行 decommission 操作,该操作会将该 BE 上的数据全部迁移到其他节点中。 -3. decommission 操作完成后,该 BE 不会被删除。此时,取消掉该 BE 的 decommission 状态。则数据会开始从其他 BE 节点均衡回这个节点。此时,数据将会均匀的分布到该 BE 的所有磁盘上。 -4. 对所有 BE 节点依次执行 2,3 两个步骤,最终达到所有节点磁盘均衡的目的。 - -### `period_of_auto_resume_min` - -默认值:5 (s) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -自动恢复 Routine load 的周期 - -### `max_tolerable_backend_down_num` - -默认值:0 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -只要有一个BE宕机,Routine Load 就无法自动恢复 - -### `enable_materialized_view` - -默认值:true - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -该配置用于开启和关闭创建物化视图功能。如果设置为 true,则创建物化视图功能开启。用户可以通过 `CREATE MATERIALIZED VIEW` 命令创建物化视图。如果设置为 false,则无法创建物化视图。 - -如果在创建物化视图的时候报错 `The materialized view is coming soon` 或 `The materialized view is disabled` 则说明改配置被设置为了 false,创建物化视图功能关闭了。可以通过修改配置为 true 来启动创建物化视图功能。 - -该变量为动态配置,用户可以在 FE 进程启动后,通过命令修改配置。也可以通过修改 FE 的配置文件,重启 FE 来生效 - -### `check_java_version` - -默认值:true - -Doris 将检查已编译和运行的 Java 版本是否兼容,如果不兼容将抛出Java版本不匹配的异常信息,并终止启动 - -### `max_running_rollup_job_num_per_table` - -默认值:1 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -控制 Rollup 作业并发限制 - -### `dynamic_partition_enable` - -默认值:true - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -是否启用动态分区,默认启用 - -### `dynamic_partition_check_interval_seconds` - -默认值:600秒,10分钟 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -检查动态分区的频率 - -### `disable_cluster_feature` - -默认值:true - -是否可以动态配置:true - -多集群功能将在 0.12 版本中弃用 ,将此配置设置为 true 将禁用与集群功能相关的所有操作,包括: - -1. 创建/删除集群 -2. 添加、释放BE/将BE添加到集群/停用集群balance -3. 更改集群的后端数量 -4. 链接/迁移数据库 - -### `force_do_metadata_checkpoint` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果设置为 true,则无论 jvm 内存使用百分比如何,检查点线程都会创建检查点 - -### `metadata_checkpoint_memory_threshold` - -默认值:60 (60%) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果 jvm 内存使用百分比(堆或旧内存池)超过此阈值,则检查点线程将无法工作以避免 OOM。 - -### `max_distribution_pruner_recursion_depth` - -默认值:100 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:false - -这将限制哈希分布修剪器的最大递归深度。 例如:其中 a in(5 个元素)和 b in(4 个元素)和 c in(3 个元素)和 d in(2 个元素)。 a/b/c/d 是分布式列,所以递归深度为 5 * 4 * 3 * 2 = 120,大于 100, 因此该分发修剪器将不起作用,只会返回所有 buckets。 增加深度可以支持更多元素的分布修剪,但可能会消耗更多的 CPU - -通过 `ADMIN SHOW FRONTEND CONFIG;` 可以查看到该配置项可以动态配置(`IsMutable` 为 true)。并且不是 Master FE 独有配置。 - -同样,我们可以通过动态修改配置的命令修改该配置。因为该配置不是 Master FE 独有配置,所以需要单独连接到不同的 FE,进行动态修改配置的操作,这样才能保证所有 FE 都使用了修改后的配置值 - - -### `using_old_load_usage_pattern` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果设置为 true,处理错误的 insert stmt 仍将返回一个标签给用户。 用户可以使用此标签来检查加载作业的状态。 默认值为 false,表示插入操作遇到错误,不带加载标签,直接抛出异常给用户客户端。 - -### `small_file_dir` - -默认值:DORIS_HOME_DIR + “/small_files” - -保存小文件的目录 - -### `max_small_file_size_bytes` - -默认值:1M - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -SmallFileMgr 中单个文件存储的最大大小 - -### `max_small_file_number` - -默认值:100 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -SmallFileMgr 中存储的最大文件数 - -### `max_routine_load_task_num_per_be` - -默认值:5 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -每个 BE 的最大并发例 Routine Load 任务数。 这是为了限制发送到 BE 的 Routine Load 任务的数量,并且它也应该小于 BE config `routine_load_thread_pool_size`(默认 10),这是 BE 上的 Routine Load 任务线程池大小。 - -### `max_routine_load_task_concurrent_num` - -默认值:5 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -单个 Routine Load 作业的最大并发任务数 - -### `max_routine_load_job_num` - -默认值:100 - -最大 Routine Load 作业数,包括 NEED_SCHEDULED, RUNNING, PAUSE - -### `max_backup_restore_job_num_per_db` - -默认值:10 - -此配置用于控制每个 DB 能够记录的 backup/restore 任务的数量 - -### `max_running_txn_num_per_db` - -默认值:100 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -这个配置主要是用来控制同一个 DB 的并发导入个数的。 - -当集群中有过多的导入任务正在运行时,新提交的导入任务可能会报错: - -```text -current running txns on db xxx is xx, larger than limit xx -``` - -该遇到该错误时,说明当前集群内正在运行的导入任务超过了该配置值。此时建议在业务侧进行等待并重试导入任务。 - -一般来说不推荐增大这个配置值。过高的并发数可能导致系统负载过大 - -### `enable_metric_calculator` - -默认值:true - -如果设置为 true,指标收集器将作为守护程序计时器运行,以固定间隔收集指标 - -### `report_queue_size` - -默认值: 100 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -这个阈值是为了避免在 FE 中堆积过多的报告任务,可能会导致 OOM 异常等问题。 并且每个 BE 每 1 分钟会报告一次 tablet 信息,因此无限制接收报告是不可接受的。以后我们会优化 tablet 报告的处理速度 - -**不建议修改这个值** - -### `partition_rebalance_max_moves_num_per_selection` - -默认值:10 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -仅在使用 PartitionRebalancer 时有效 , - -### `partition_rebalance_move_expire_after_access` - -默认值:600 (s) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -仅在使用 PartitionRebalancer 时有效。 如果更改,缓存的移动将被清除 - -### tablet_rebalancer_type - -默认值:BeLoad - -是否为 Master FE 节点独有的配置项:true - -rebalancer 类型(忽略大小写):BeLoad、Partition。 如果类型解析失败,默认使用 BeLoad - -### `max_balancing_tablets` - -默认值:100 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果 TabletScheduler 中的 balance tablet 数量超过 `max_balancing_tablets`,则不再进行 balance 检查 - -### `max_scheduling_tablets` - -默认值:2000 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果 TabletScheduler 中调度的 tablet 数量超过 `max_scheduling_tablets`, 则跳过检查。 - -### `disable_balance` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果设置为 true,TabletScheduler 将不会做 balance - -### `balance_load_score_threshold` - -默认值:0.1 (10%) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -集群 balance 百分比的阈值,如果一个BE的负载分数比平均分数低10%,这个后端将被标记为低负载,如果负载分数比平均分数高10%,将被标记为高负载。 - -### `schedule_slot_num_per_path` - -默认值:2 - -tablet 调度程序中每个路径的默认 slot 数量 - -### `tablet_repair_delay_factor_second` - -默认值:60 (s) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -决定修复 tablet 前的延迟时间因素。 - -1. 如果优先级为 VERY_HIGH,请立即修复。 -2. HIGH,延迟 tablet_repair_delay_factor_second * 1; -3. 正常:延迟 tablet_repair_delay_factor_second * 2; -4. 低:延迟 tablet_repair_delay_factor_second * 3; - -### `es_state_sync_interval_second` - -默认值:10 - -FE 会在每隔 es_state_sync_interval_secs 调用 es api 获取 es 索引分片信息 - -### `disable_hadoop_load` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -默认不禁用,将来不推荐使用 hadoop 集群 load。 设置为 true 以禁用这种 load 方式。 - -### `db_used_data_quota_update_interval_secs` - -默认值:300 (s) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -一个主守护线程将每 `db_used_data_quota_update_interval_secs` 更新数据库 txn 管理器的数据库使用数据配额 - -为了更好的数据导入性能,在数据导入之前的数据库已使用的数据量是否超出配额的检查中,我们并不实时计算数据库已经使用的数据量,而是获取后台线程周期性更新的值。 - -该配置用于设置更新数据库使用的数据量的值的时间间隔 - -### `disable_load_job` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -不禁用,如果这设置为 true - -- 调用开始 txn api 时,所有挂起的加载作业都将失败 -- 调用 commit txn api 时,所有准备加载作业都将失败 -- 所有提交的加载作业将等待发布 - -### `catalog_try_lock_timeout_ms` - -默认值:5000 (ms) - -是否可以动态配置:true - -元数据锁的 tryLock 超时配置。 通常它不需要改变,除非你需要测试一些东西。 - -### `max_query_retry_time` - -默认值:1 - -是否可以动态配置:true - -查询重试次数。 如果我们遇到 RPC 异常并且没有将结果发送给用户,则可能会重试查询。 您可以减少此数字以避免雪崩灾难。 - -### `remote_fragment_exec_timeout_ms` - -默认值:5000 (ms) - -是否可以动态配置:true - -异步执行远程 fragment 的超时时间。 在正常情况下,异步远程 fragment 将在短时间内执行。 如果系统处于高负载状态,请尝试将此超时设置更长的时间。 - -### `enable_local_replica_selection` - -默认值:false - -是否可以动态配置:true - -如果设置为 true,Planner 将尝试在与此前端相同的主机上选择 tablet 的副本。 -在以下情况下,这可能会减少网络传输: - -1. N 个主机,部署了 N 个 BE 和 N 个 FE。 - -2. 数据有N个副本。 - -3. 高并发查询均匀发送到所有 Frontends - -在这种情况下,所有 Frontends 只能使用本地副本进行查询。如果想当本地副本不可用时,使用非本地副本服务查询,请将 enable_local_replica_selection_fallback 设置为 true - -### `enable_local_replica_selection_fallback` - -默认值:false - -是否可以动态配置:true - -与 enable_local_replica_selection 配合使用,当本地副本不可用时,使用非本地副本服务查询。 - -### `max_unfinished_load_job` - -默认值:1000 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -最大加载任务数,包括 PENDING、ETL、LOADING、QUORUM_FINISHED。 如果超过此数量,则不允许提交加载作业。 - -### `max_bytes_per_broker_scanner` - -默认值:3 * 1024 * 1024 * 1024L (3G) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -broker scanner 程序可以在一个 broker 加载作业中处理的最大字节数。 通常,每个 BE 都有一个 broker scanner 程序。 - -### `enable_auth_check` - -默认值:true - -如果设置为 false,则身份验证检查将被禁用,以防新权限系统出现问题。 - -### `tablet_stat_update_interval_second` - -默认值:300,(5分钟) - -tablet 状态更新间隔 -所有 FE 将在每个时间间隔从所有 BE 获取 tablet 统计信息 - -### `storage_flood_stage_usage_percent ` - -默认值:95 (95%) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -### ` storage_flood_stage_left_capacity_bytes` - -默认值: - - storage_flood_stage_usage_percent : 95 (95%) - - storage_flood_stage_left_capacity_bytes : 1 * 1024 * 1024 * 1024 (1GB) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果磁盘容量达到 `storage_flood_stage_usage_percent` 和 `storage_flood_stage_left_capacity_bytes` 以下操作将被拒绝: - -1. load 作业 -2. restore 工作 - -### `storage_high_watermark_usage_percent` - -默认值:85 (85%) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -### `storage_min_left_capacity_bytes` - -默认值: 2 * 1024 * 1024 * 1024 (2GB) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -`storage_high_watermark_usage_percent` 限制 BE 端存储路径使用最大容量百的分比。 `storage_min_left_capacity_bytes`限制 BE 端存储路径的最小剩余容量。 如果达到这两个限制,则不能选择此存储路径作为 tablet 存储目的地。 但是对于 tablet 恢复,我们可能会超过这些限制以尽可能保持数据完整性。 - -### `backup_job_default_timeout_ms` - -默认值:86400 * 1000 (1天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -备份作业的默认超时时间 - -### `with_k8s_certs` - -默认值:false - -如果在本地使用 k8s 部署管理器,请将其设置为 true 并准备证书文件 - -### `dpp_hadoop_client_path` - -默认值:/lib/hadoop-client/hadoop/bin/hadoop - -### `dpp_bytes_per_reduce` - -默认值:100 * 1024 * 1024L (100M) - -### `dpp_default_cluster` - -默认值:palo-dpp - -### `dpp_default_config_str` - -默认值:{ - hadoop_configs : 'mapred.job.priority=NORMAL;mapred.job.map.capacity=50;mapred.job.reduce.capacity=50;mapred.hce.replace.streaming=false;abaci.long.stored.job=true;dce.shuffle.enable=false;dfs.client.authserver.force_stop=true;dfs.client.auth.method=0' - } - -### dpp_config_str - -默认值:{ - palo-dpp : { - hadoop_palo_path : '/dir', - hadoop_configs : 'fs.default.name=hdfs://host:port;mapred.job.tracker=host:port;hadoop.job.ugi=user,password' - } - } - -### `enable_deploy_manager` - -默认值:disable - -如果使用第三方部署管理器部署 Doris,则设置为 true - -有效的选项是: - -- disable:没有部署管理器 -- k8s:Kubernetes -- ambari:Ambari -- local:本地文件(用于测试或 Boxer2 BCC 版本) - -### `enable_token_check` - -默认值:true - -为了向前兼容,稍后将被删除。 下载image文件时检查令牌。 - -### `expr_depth_limit` - -默认值:3000 - -是否可以动态配置:true - -限制 expr 树的深度。 超过此限制可能会导致在持有 db read lock 时分析时间过长。 - -### `expr_children_limit` - -默认值:10000 - -是否可以动态配置:true - -限制 expr 树的 expr 子节点的数量。 超过此限制可能会导致在持有数据库读锁时分析时间过长。 - -### `proxy_auth_magic_prefix` - -默认值:x@8 - -### `proxy_auth_enable` - -默认值:false - -### `meta_publish_timeout_ms` - -默认值:1000ms - -默认元数据发布超时时间 - -### `disable_colocate_balance` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -此配置可以设置为 true 以禁用自动 colocate 表的重新定位和平衡。 如果 `disable_colocate_balance'`设置为 true,则 ColocateTableBalancer 将不会重新定位和平衡并置表。 - -**注意:** - -1. 一般情况下,根本不需要关闭平衡。 -2. 因为一旦关闭平衡,不稳定的 colocate 表可能无法恢复 -3. 最终查询时无法使用 colocate 计划。 - -### `query_colocate_join_memory_limit_penalty_factor` - -默认值:1 - -是否可以动态配置:true - -colocote join PlanFragment instance 的 memory_limit = exec_mem_limit / min (query_colocate_join_memory_limit_penalty_factor, instance_num) - -### `max_connection_scheduler_threads_num` - -默认值:4096 - -查询请求调度器中的最大线程数。 - -前的策略是,有请求过来,就为其单独申请一个线程进行服务 - -### `qe_max_connection` - -默认值:1024 - -每个 FE 的最大连接数 - -### `check_consistency_default_timeout_second` - -默认值:600 (10分钟) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -单个一致性检查任务的默认超时。 设置足够长以适合您的tablet大小。 - -### `consistency_check_start_time` - -默认值:23 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -一致性检查开始时间 - -一致性检查器将从 `consistency_check_start_time` 运行到 `consistency_check_end_time`。 默认为 23:00 至 04:00 - -### `consistency_check_end_time` - -默认值:04 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -一致性检查结束时间 - -一致性检查器将从 `consistency_check_start_time` 运行到 `consistency_check_end_time`。 默认为 23:00 至 04:00 - -### `export_tablet_num_per_task` - -默认值:5 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -每个导出查询计划的 tablet 数量 - -### `export_task_default_timeout_second` - -默认值:2 * 3600 (2小时) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -导出作业的默认超时时间 - -### `export_running_job_num_limit` - -默认值:5 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -运行导出作业的并发限制,默认值为 5,0 表示无限制 - -### `export_checker_interval_second` - -默认值:5 - -导出检查器的运行间隔 - -### `default_load_parallelism` - -默认值:1 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -单个节点broker load导入的默认并发度。 -如果用户在提交broker load任务时,在properties中自行指定了并发度,则采用用户自定义的并发度。 -此参数将与`max_broker_concurrency`、`min_bytes_per_broker_scanner`等多个配置共同决定导入任务的并发度。 - -### `max_broker_concurrency` - -默认值:10 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -broker scanner 的最大并发数。 - -### `min_bytes_per_broker_scanner` - -默认值:67108864L (64M) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -单个 broker scanner 将读取的最小字节数。 - -### `catalog_trash_expire_second` - -默认值:86400L (1天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -删除数据库(表/分区)后,您可以使用 RECOVER stmt 恢复它。 这指定了最大数据保留时间。 一段时间后,数据将被永久删除。 - -### `storage_cooldown_second` - -默认值:30 * 24 * 3600L (30天) - -创建表(或分区)时,可以指定其存储介质(HDD 或 SSD)。 如果设置为 SSD,这将指定tablet在 SSD 上停留的默认时间。 之后,tablet将自动移动到 HDD。 您可以在 `CREATE TABLE stmt` 中设置存储冷却时间。 - -### `default_storage_medium` - -默认值:HDD - -创建表(或分区)时,可以指定其存储介质(HDD 或 SSD)。 如果未设置,则指定创建时的默认介质。 - -### `max_backend_down_time_second` - -默认值:3600 (1小时) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果 BE 关闭了 `max_backend_down_time_second`,将触发 BACKEND_DOWN 事件。 - -### `alter_table_timeout_second` - -默认值:86400 (1天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -ALTER TABLE 请求的最大超时时间。 设置足够长以适合您的表格数据大小 - -### `capacity_used_percent_high_water` - -默认值:0.75 (75%) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -磁盘容量的高水位使用百分比。 这用于计算后端的负载分数 - -### `clone_distribution_balance_threshold` - -默认值:0.2 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -BE副本数的平衡阈值。 - -### `clone_capacity_balance_threshold` - -默认值:0.2 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -* BE 中数据大小的平衡阈值。 - - 平衡算法为: - - 1. 计算整个集群的平均使用容量(AUC)(总数据大小/BE数) - - 2. 高水位为(AUC * (1 + clone_capacity_balance_threshold)) - - 3. 低水位为(AUC * (1 - clone_capacity_balance_threshold)) - -克隆检查器将尝试将副本从高水位 BE 移动到低水位 BE。 - -### `replica_delay_recovery_second` - -默认值:0 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -副本之间的最小延迟秒数失败,并且尝试使用克隆来恢复它。 - -### `clone_high_priority_delay_second` - -默认值:0 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -高优先级克隆作业的延迟触发时间 - -### `clone_normal_priority_delay_second` - -默认值:300 (5分钟) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -正常优先级克隆作业的延迟触发时间 - -### `clone_low_priority_delay_second` - -默认值:600 (10分钟) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -低优先级克隆作业的延迟触发时间。 克隆作业包含需要克隆(恢复或迁移)的tablet。 如果优先级为 LOW,则会延迟 `clone_low_priority_delay_second `,在作业创建之后然后被执行。 这是为了避免仅因为主机短时间停机而同时运行大量克隆作业。 - -注意这个配置(还有 `clone_normal_priority_delay_second`) 如果它小于 `clone_checker_interval_second` 将不起作用 - -### `clone_max_job_num` - -默认值:100 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -低优先级克隆作业的并发数。 高优先级克隆作业的并发性目前是无限的。 - -### `clone_job_timeout_second` - -默认值:7200 (2小时) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -单个克隆作业的默认超时。 设置足够长以适合您的副本大小。 副本数据越大,完成克隆所需的时间就越多 - -### `clone_checker_interval_second` - -默认值:300 (5分钟) - -克隆检查器的运行间隔 - -### `tablet_delete_timeout_second` - -默认值:2 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -与 `tablet_create_timeout_second` 含义相同,但在删除 tablet 时使用 - -### `async_loading_load_task_pool_size` - -默认值:10 - -是否可以动态配置:false - -是否为 Master FE 节点独有的配置项:true - -`loading_load`任务执行程序池大小。 该池大小限制了正在运行的最大 `loading_load`任务数。 - -当前,它仅限制 `broker load`的 `loading_load`任务的数量。 - -### `async_pending_load_task_pool_size` - -默认值:10 - -是否可以动态配置:false - -是否为 Master FE 节点独有的配置项:true - -`pending_load`任务执行程序池大小。 该池大小限制了正在运行的最大 `pending_load`任务数。 - -当前,它仅限制 `broker load`和 `spark load`的 `pending_load`任务的数量。 - -它应该小于 `max_running_txn_num_per_db`的值 - -### `async_load_task_pool_size` - -默认值:10 - -是否可以动态配置:false - -是否为 Master FE 节点独有的配置项:true - -此配置只是为了兼容旧版本,此配置已被 `async_loading_load_task_pool_size`取代,以后会被移除。 - -### `disable_show_stream_load` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -是否禁用显示 stream load 并清除内存中的 stream load 记录。 - -### `max_stream_load_record_size` - -默认值:5000 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -可以存储在内存中的最近 stream load 记录的默认最大数量 - -### `fetch_stream_load_record_interval_second` - -默认值:120 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -获取 stream load 记录间隔 - -### `desired_max_waiting_jobs` - -默认值:100 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -routine load V2 版本加载的默认等待作业数 ,这是一个理想的数字。 在某些情况下,例如切换 master,当前数量可能超过` desired_max_waiting_jobs` - -### `yarn_config_dir` - -默认值:PaloFe.DORIS_HOME_DIR + "/lib/yarn-config" - - -默认的 Yarn 配置文件目录每次运行 Yarn 命令之前,我们需要检查一下这个路径下是否存在 config 文件,如果不存在,则创建它们。 - - -### `yarn_client_path` - -默认值:PaloFe.DORIS_HOME_DIR + "/lib/yarn-client/hadoop/bin/yarn" - -默认 Yarn 客户端路径 - -### `spark_launcher_log_dir` - -默认值: sys_log_dir + "/spark_launcher_log" - -指定的 Spark 启动器日志目录 - -### `spark_resource_path` - -默认值:空 - -默认值的 Spark 依赖路径 - -### `spark_home_default_dir` - -默认值:PaloFe.DORIS_HOME_DIR + "/lib/spark2x" - -默认的 Spark home 路径 - -### `spark_load_default_timeout_second` - -默认值:86400 (1天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -默认 Spark 加载超时时间 - -### `spark_dpp_version` - -默认值:1.0.0 - -Spark 默认版本号 - -### `hadoop_load_default_timeout_second` - -默认值:86400 * 3 (3天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -Hadoop 加载超时时间 - -### `min_load_timeout_second` - -默认值:1 (1秒) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -mini load 超时时间,适用于所有类型的加载 - -### `max_stream_load_timeout_second` - -默认值:259200 (3天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -stream load 和 mini load 最大超时时间 - -### `max_load_timeout_second` - -默认值:259200 (3天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -load 最大超时时间,适用于除 stream load 之外的所有类型的加载 - -### `stream_load_default_timeout_second` - -默认值:600 (s) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -默认 stream load 和 mini load 超时时间 - -### `insert_load_default_timeout_second` - -默认值:3600 (1小时) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -默认 insert load 超时时间 - -### `mini_load_default_timeout_second` - -默认值:3600 (1小时) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -默认非 stream load 类型的 mini load 的超时时间 - -### `broker_load_default_timeout_second` - -默认值:14400 (4小时) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -Broker load 的默认超时时间 - -### `load_running_job_num_limit` - -默认值:0 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -Load 任务数量限制,默认0,无限制 - -### `load_input_size_limit_gb` - -默认值:0 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -Load 作业输入的数据大小,默认是0,无限制 - -### `delete_thread_num` - -默认值:10 - -删除作业的并发线程数 - -### `load_etl_thread_num_normal_priority` - -默认值:10 - -NORMAL 优先级 etl 加载作业的并发数。 - -### `load_etl_thread_num_high_priority` - -默认值:3 - -高优先级 etl 加载作业的并发数。 - -### `load_pending_thread_num_normal_priority` - -默认值:10 - -NORMAL 优先级挂起加载作业的并发数。 - -### `load_pending_thread_num_high_priority` - -默认值:3 - -高优先级挂起加载作业的并发数。 加载作业优先级定义为 HIGH 或 NORMAL。 所有小批量加载作业都是 HIGH 优先级,其他类型的加载作业是 NORMAL 优先级。 设置优先级是为了避免慢加载作业长时间占用线程。 这只是内部优化的调度策略。 目前,您无法手动指定作业优先级。 - -### `load_checker_interval_second` - -默认值:5 (s) - -负载调度器运行间隔。 加载作业将其状态从 PENDING 转移到 LOADING 到 FINISHED。 加载调度程序将加载作业从 PENDING 转移到 LOADING 而 txn 回调会将加载作业从 LOADING 转移到 FINISHED。 因此,当并发未达到上限时,加载作业最多需要一个时间间隔才能完成。 - -### `max_layout_length_per_row` - -默认值:100000 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -一行的最大内存布局长度。 默认为 100 KB。 -在 BE 中,RowBlock 的最大大小为 100MB(在 be.conf 中配置为 `max_unpacked_row_block_size `)。 -每个 RowBlock 包含 1024 行。 因此,一行的最大大小约为 100 KB。 - -例如。 -schema:k1(int), v1(decimal), v2(varchar(2000)) -那么一行的内存布局长度为:4(int) + 16(decimal) + 2000(varchar) = 2020 (Bytes) - -查看所有类型的内存布局长度,在 mysql-client 中运行 `help create table`。 - -如果要增加此数字以支持一行中的更多列,则还需要增加 -be.conf 中的 `max_unpacked_row_block_size `,但性能影响未知。 - -### `load_straggler_wait_second` - -默认值:300 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -负载中落后节点的最大等待秒数 -例如:有 3 个副本 A, B, C load 已经在 t1 时仲裁完成 (A,B) 并且 C 没有完成,如果 (current_time-t1)> 300s,那么 doris会将 C 视为故障节点,将调用事务管理器提交事务并告诉事务管理器 C 失败。 - -这也用于等待发布任务时 - -**注意:**这个参数是所有作业的默认值,DBA 可以为单独的作业指定它 - -### `thrift_server_type` - -该配置表示FE的Thrift服务使用的服务模型, 类型为string, 大小写不敏感。 - -若该参数为 `SIMPLE`, 则使用 `TSimpleServer` 模型, 该模型一般不适用于生产环境,仅限于测试使用。 - -若该参数为 `THREADED`, 则使用 `TThreadedSelectorServer` 模型,该模型为非阻塞式I/O模型,即主从 Reactor 模型,该模型能及时响应大量的并发连接请求,在多数场景下有较好的表现。 - -若该参数为 `THREAD_POOL`, 则使用 `TThreadPoolServer` 模型,该模型为阻塞式I/O模型,使用线程池处理用户连接,并发连接数受限于线程池的数量,如果能提前预估并发请求的数量,并且能容忍足够多的线程资源开销,该模型会有较好的性能表现,默认使用该服务模型 - -### `thrift_server_max_worker_threads` - -默认值:4096 - -Thrift Server最大工作线程数 - -### `publish_version_interval_ms` - -默认值:10 (ms) - -两个发布版本操作之间的最小间隔 - -### `publish_version_timeout_second` - -默认值:30 (s) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -一个事务的所有发布版本任务完成的最大等待时间 - -### `max_create_table_timeout_second` - -默认值:60 (s) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -为了在创建表(索引)不等待太久,设置一个最大超时时间 - -### `tablet_create_timeout_second` - -默认值:1(s) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -创建单个副本的最长等待时间。 -例如。 -如果您为每个表创建一个包含 m 个 tablet 和 n 个副本的表, -创建表请求将在超时前最多运行 (m * n * tablet_create_timeout_second)。 - -### `max_mysql_service_task_threads_num` - -默认值:4096 - -mysql 中处理任务的最大线程数。 - -### `cluster_id` - -默认值:-1 - -如果节点(FE 或 BE)具有相同的集群 id,则将认为它们属于同一个Doris 集群。 Cluster id 通常是主 FE 首次启动时生成的随机整数。 您也可以指定一个。 - -### `auth_token` - -默认值:空 - -用于内部身份验证的集群令牌。 - -### `cluster_name` - -默认值: Apache doris - -集群名称,将显示为网页标题 - -### `mysql_service_io_threads_num` - -默认值:4 - -mysql 中处理 io 事件的线程数。 - -### `mysql_service_nio_enabled` - -默认值:true - -mysql 服务 nio 选项是否启用,默认启用 - -### `query_port` - -默认值:9030 - -Doris FE 通过 mysql 协议查询连接端口 - -### `rewrite_count_distinct_to_bitmap_hll` - -默认值:true - -该变量为 session variable,session 级别生效。 - -- 类型:boolean -- 描述:**仅对于 AGG 模型的表来说**,当变量为 true 时,用户查询时包含 count(distinct c1) 这类聚合函数时,如果 c1 列本身类型为 bitmap,则 count distnct 会改写为 bitmap_union_count(c1)。 当 c1 列本身类型为 hll,则 count distinct 会改写为 hll_union_agg(c1) 如果变量为 false,则不发生任何改写。 - -### `rpc_port` - -默认值:9020 - -FE Thrift Server的端口 - -### `thrift_backlog_num` - -默认值:1024 - -thrift 服务器的 backlog_num 当你扩大这个 backlog_num 时,你应该确保它的值大于 linux `/proc/sys/net/core/somaxconn` 配置 - -### `thrift_client_timeout_ms` - -默认值:0 - -thrift 服务器的连接超时和套接字超时配置 thrift_client_timeout_ms 的默认值设置为零以防止读取超时 - -### `mysql_nio_backlog_num` - -默认值:1024 - -mysql nio server 的 backlog_num 当你放大这个 backlog_num 时,你应该同时放大 linux `/proc/sys/net/core/somaxconn`文件中的值 - -### `http_backlog_num` - -默认值:1024 - -netty http server 的 backlog_num 当你放大这个 backlog_num 时,你应该同时放大 linux `/proc/sys/net/core/somaxconn`文件中的值 - -### `http_max_line_length` - -默认值:4096 - -HTTP 服务允许接收请求的 URL 的最大长度,单位为比特 - -### `http_max_header_size` - -默认值:8192 - -HTTP 服务允许接收请求的 Header 的最大长度,单位为比特 - -### `http_max_chunk_size` - -默认值:8192 - -http 上下文 chunk 块的最大尺寸 - -### `http_port` - -默认值:8030 - -FE http 端口,当前所有 FE http 端口都必须相同 - -### `max_bdbje_clock_delta_ms` - -默认值:5000 (5秒) - -设置非主 FE 到主 FE 主机之间的最大可接受时钟偏差。 每当非主 FE 通过 BDBJE 建立到主 FE 的连接时,都会检查该值。 如果时钟偏差大于此值,则放弃连接。 - -### `ignore_meta_check` - -默认值:false - -是否可以动态配置:true - -如果为 true,非主 FE 将忽略主 FE 与其自身之间的元数据延迟间隙,即使元数据延迟间隙超过 `meta_delay_toleration_second`。 非主 FE 仍将提供读取服务。 当您出于某种原因尝试停止 Master FE 较长时间,但仍希望非 Master FE 可以提供读取服务时,这会很有帮助。 - -### `metadata_failure_recovery` - -默认值:false - -如果为 true,FE 将重置 bdbje 复制组(即删除所有可选节点信息)并应该作为 Master 启动。 如果所有可选节点都无法启动,我们可以将元数据复制到另一个节点并将此配置设置为 true 以尝试重新启动 FE。 - -### `priority_networks` - -默认值:空 - -为那些有很多 ip 的服务器声明一个选择策略。 请注意,最多应该有一个 ip 与此列表匹配。 这是一个以分号分隔格式的列表,用 CIDR 表示法,例如 10.10.10.0/24。 如果没有匹配这条规则的ip,会随机选择一个。 - -### `txn_rollback_limit` - -默认值:100 - -尝试重新加入组时 bdbje 可以回滚的最大 txn 数 - -### `max_agent_task_threads_num` - -默认值:4096 - -是否为 Master FE 节点独有的配置项:true - -代理任务线程池中处理代理任务的最大线程数。 - -### `heartbeat_mgr_blocking_queue_size` - -默认值:1024 - -是否为 Master FE 节点独有的配置项:true - -在 heartbeat_mgr 中存储心跳任务的阻塞队列大小。 - -### `heartbeat_mgr_threads_num` - -默认值:8 - -是否为 Master FE 节点独有的配置项:true - -heartbeat _mgr 中处理心跳事件的线程数。 - -### `bdbje_replica_ack_timeout_second` - -默认值:10 - -元数据会同步写入到多个 Follower FE,这个参数用于控制 Master FE 等待 Follower FE 发送 ack 的超时时间。当写入的数据较大时,可能 ack 时间较长,如果超时,会导致写元数据失败,FE 进程退出。此时可以适当调大这个参数。 - -### `bdbje_lock_timeout_second` - -默认值:1 - -bdbje 操作的 lock timeout 如果 FE WARN 日志中有很多 LockTimeoutException,可以尝试增加这个值 - -### `bdbje_heartbeat_timeout_second` - -默认值:30 - -master 和 follower 之间 bdbje 的心跳超时。 默认为 30 秒,与 bdbje 中的默认值相同。 如果网络遇到暂时性问题,一些意外的长 Java GC 使您烦恼,您可以尝试增加此值以减少错误超时的机会 - -### `replica_ack_policy` - -默认值:SIMPLE_MAJORITY - -选项:ALL, NONE, SIMPLE_MAJORITY - -bdbje 的副本 ack 策略。 更多信息,请参见:http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.ReplicaAckPolicy.html - -### `replica_sync_policy` - -默认值:SYNC - -选项:SYNC, NO_SYNC, WRITE_NO_SYNC - -bdbje 的Follower FE 同步策略。 - -### `master_sync_policy` - -默认值:SYNC - -选项:SYNC, NO_SYNC, WRITE_NO_SYNC - -Master FE 的 bdbje 同步策略。 如果您只部署一个 Follower FE,请将其设置为“SYNC”。 如果你部署了超过 3 个 Follower FE,你可以将这个和下面的 `replica_sync_policy ` 设置为 WRITE_NO_SYNC。 更多信息,参见:http://docs.oracle.com/cd/E17277_02/html/java/com/sleepycat/je/Durability.SyncPolicy.html - -### `meta_delay_toleration_second` - -默认值:300 (5分钟) - -如果元数据延迟间隔超过 `meta_delay_toleration_second `,非主 FE 将停止提供服务 - -### `edit_log_roll_num` - -默认值:50000 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -Master FE will save image every `edit_log_roll_num ` meta journals.。 - -### `edit_log_port` - -默认值:9010 - -bdbje端口 - -### `edit_log_type` - -默认值:BDB - -编辑日志类型。 -BDB:将日志写入 bdbje -LOCAL:已弃用。 - -### `tmp_dir` - -默认值:PaloFe.DORIS_HOME_DIR + "/temp_dir" - -temp dir 用于保存某些过程的中间结果,例如备份和恢复过程。 这些过程完成后,将清除此目录中的文件。 - -### `meta_dir` - -默认值:PaloFe.DORIS_HOME_DIR + "/doris-meta" - -Doris 元数据将保存在这里。 强烈建议将此目录的存储为: - -1. 高写入性能(SSD) - -2. 安全(RAID) - -### `custom_config_dir` - -默认值:PaloFe.DORIS_HOME_DIR + "/conf" - -自定义配置文件目录 - -配置 `fe_custom.conf` 文件的位置。默认为 `conf/` 目录下。 - -在某些部署环境下,`conf/` 目录可能因为系统的版本升级被覆盖掉。这会导致用户在运行是持久化修改的配置项也被覆盖。这时,我们可以将 `fe_custom.conf` 存储在另一个指定的目录中,以防止配置文件被覆盖。 - -### `log_roll_size_mb` - -默认值:1024 (1G) - -一个系统日志和审计日志的最大大小 - -### `sys_log_dir` - -默认值:PaloFe.DORIS_HOME_DIR + "/log" - -sys_log_dir: - -这指定了 FE 日志目录。 FE 将产生 2 个日志文件: - -1. fe.log:FE进程的所有日志。 -2. fe.warn.log FE 进程的所有警告和错误日志。 - -### `sys_log_level` - -默认值:INFO - -日志级别,可选项:INFO, WARNING, ERROR, FATAL - -### `sys_log_roll_num` - -默认值:10 - -要保存在 `sys_log_roll_interval ` 内的最大 FE 日志文件。 默认为 10,表示一天最多有 10 个日志文件 - -### `sys_log_verbose_modules` - -默认值:{} - -详细模块。 VERBOSE 级别由 log4j DEBUG 级别实现。 - -例如: - sys_log_verbose_modules = org.apache.doris.catalog - 这只会打印包 org.apache.doris.catalog 及其所有子包中文件的调试日志。 - -### `sys_log_roll_interval` - -默认值:DAY - -可选项: - -- DAY: log 前缀是 yyyyMMdd -- HOUR: log 前缀是 yyyyMMddHH - -### `sys_log_delete_age` - -默认值:7d - -默认为 7 天,如果日志的最后修改时间为 7 天前,则将其删除。 - -支持格式: - -- 7d: 7 天 -- 10h: 10 小时 -- 60m: 60 分钟 -- 120s: 120 秒 - -### `audit_log_dir` - -默认值:PaloFe.DORIS_HOME_DIR + "/log" - -审计日志目录: -这指定了 FE 审计日志目录。 -审计日志 fe.audit.log 包含所有请求以及相关信息,如 `user, host, cost, status ` 等。 - -### `audit_log_roll_num` - -默认值:90 - -保留在 `audit_log_roll_interval ` 内的最大 FE 审计日志文件。 - -### `audit_log_modules` - -默认值:{"slow_query", "query", "load", "stream_load"} - -慢查询包含所有开销超过 *qe_slow_log_ms* 的查询 - -### `qe_slow_log_ms` - -默认值:5000 (5秒) - -如果查询的响应时间超过此阈值,则会在审计日志中记录为 slow_query。 - -### `audit_log_roll_interval` - -默认值:DAY - -DAY: log前缀是:yyyyMMdd -HOUR: log前缀是:yyyyMMddHH - -### `audit_log_delete_age` - -默认值:30d - -默认为 30 天,如果日志的最后修改时间为 30 天前,则将其删除。 -支持格式: -7d 7 天 -10 小时 10 小时 -60m 60 分钟 -120s 120 秒 - -### `plugin_dir` - -默认值:DORIS_HOME + "/plugins - -插件安装目录 - -### `plugin_enable` - -默认值:true - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -插件是否启用,默认启用 - -### `label_keep_max_second` - -默认值:3 * 24 * 3600 (3天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -`label_keep_max_second `后将删除已完成或取消的加载作业的标签, - -1. 去除的标签可以重复使用。 -2. 设置较短的时间会降低 FE 内存使用量 (因为所有加载作业的信息在被删除之前都保存在内存中) - -在高并发写的情况下,如果出现大量作业积压,出现 `call frontend service failed`的情况,查看日志如果是元数据写占用锁的时间太长,可以将这个值调成12小时,或者更小6小时 - -### `streaming_label_keep_max_second` - -默认值:43200 (12小时) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -对于一些高频负载工作,例如:INSERT、STREAMING LOAD、ROUTINE_LOAD_TASK 。 如果过期,则删除已完成的作业或任务。 - -### `history_job_keep_max_second` - -默认值:7 * 24 * 3600 (7天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -某些作业的最大保留时间。 像 schema 更改和 Rollup 作业。 - -### `label_clean_interval_second` - -默认值:4 * 3600 (4小时) - -load 标签清理器将每隔 `label_clean_interval_second` 运行一次以清理过时的作业。 - -### `delete_info_keep_max_second` - -默认值:3 * 24 * 3600 (3天) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:false - -删除元数据中创建时间大于`delete_info_keep_max_second`的delete信息。 - -设置较短的时间将减少 FE 内存使用量和镜像文件大小。(因为所有的deleteInfo在被删除之前都存储在内存和镜像文件中) - -### `transaction_clean_interval_second` - -默认值:30 - -如果事务 visible 或者 aborted 状态,事务将在 `transaction_clean_interval_second` 秒后被清除 ,我们应该让这个间隔尽可能短,每个清洁周期都尽快 - - -### `default_max_query_instances` - -默认值:-1 - -用户属性max_query_instances小于等于0时,使用该配置,用来限制单个用户同一时刻可使用的查询instance个数。该参数小于等于0表示无限制。 - -### `use_compact_thrift_rpc` - -默认值:true - -是否使用压缩格式发送查询计划结构体。开启后,可以降低约50%的查询计划结构体大小,从而避免一些 "send fragment timeout" 错误。 -但是在某些高并发小查询场景下,可能会降低约10%的并发度。 - -### `disable_tablet_scheduler` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果设置为true,将关闭副本修复和均衡逻辑。 - - - -### `enable_force_drop_redundant_replica` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果设置为 true,系统会在副本调度逻辑中,立即删除冗余副本。这可能导致部分正在对对应副本写入的导入作业失败,但是会加速副本的均衡和修复速度。 -当集群中有大量等待被均衡或修复的副本时,可以尝试设置此参数,以牺牲部分导入成功率为代价,加速副本的均衡和修复。 - -### `repair_slow_replica` - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -如果设置为 true,会自动检测compaction比较慢的副本,并将迁移到其他机器,检测条件是 最慢副本的版本计数超过 `min_version_count_indicate_replica_compaction_too_slow` 的值, 且与最快副本的版本计数差异所占比例超过 `valid_version_count_delta_ratio_between_replicas` 的值 - -### `colocate_group_relocate_delay_second` - -默认值:1800 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -重分布一个 Colocation Group 可能涉及大量的tablet迁移。因此,我们需要一个更保守的策略来避免不必要的 Colocation 重分布。 -重分布通常发生在 Doris 检测到有 BE 节点宕机后。这个参数用于推迟对BE宕机的判断。如默认参数下,如果 BE 节点能够在 1800 秒内恢复,则不会触发 Colocation 重分布。 - -### `allow_replica_on_same_host` - -默认值:false - -是否可以动态配置:false - -是否为 Master FE 节点独有的配置项:false - -是否允许同一个 tablet 的多个副本分布在同一个 host 上。这个参数主要用于本地测试是,方便搭建多个 BE 已测试某些多副本情况。不要用于非测试环境。 - -### `min_version_count_indicate_replica_compaction_too_slow` - -默认值:300 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -版本计数阈值,用来判断副本做 compaction 的速度是否太慢 - -### `valid_version_count_delta_ratio_between_replicas` - -默认值:0.5 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -最慢副本的版本计数与最快副本的差异有效比率阈值,如果设置 `repair_slow_replica` 为 true,则用于判断是否修复最慢的副本 - -### `min_bytes_indicate_replica_too_large` - -默认值:2 * 1024 * 1024 * 1024 (2G) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -数据大小阈值,用来判断副本的数据量是否太大 - -### skip_compaction_slower_replica - -默认值:true - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:false - -如果设置为true,则在选择可查询副本时,将跳过 compaction 较慢的副本 - -### enable_create_sync_job - -开启 MySQL 数据同步作业功能。默认是 false,关闭此功能 - -默认值:false - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -### sync_commit_interval_second - -提交事务的最大时间间隔。若超过了这个时间 channel 中还有数据没有提交,consumer 会通知 channel 提交事务。 - -默认值:10(秒) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -### min_sync_commit_size - -提交事务需满足的最小 event 数量。若 Fe 接收到的 event 数量小于它,会继续等待下一批数据直到时间超过了 `sync_commit_interval_second ` 为止。默认值是 10000 个 events,如果你想修改此配置,请确保此值小于 canal 端的 `canal.instance.memory.buffer.size` 配置(默认16384),否则在 ack 前Fe会尝试获取比 store 队列长度更多的 event,导致 store 队列阻塞至超时为止。 - -默认值:10000 - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -### min_bytes_sync_commit - -提交事务需满足的最小数据大小。若 Fe 接收到的数据大小小于它,会继续等待下一批数据直到时间超过了 `sync_commit_interval_second` 为止。默认值是 15 MB,如果你想修改此配置,请确保此值小于 canal 端的 `canal.instance.memory.buffer.size` 和 `canal.instance.memory.buffer.memunit` 的乘积(默认 16 MB),否则在 ack 前 Fe 会尝试获取比 store 空间更大的数据,导致 store 队列阻塞至超时为止。 - -默认值:15 * 1024 * 1024(15M) - -是否可以动态配置:true - -是否为 Master FE 节点独有的配置项:true - -### max_bytes_sync_commit - - 数据同步作业线程池中的最大线程数量。此线程池整个FE中只有一个,用于处理FE中所有数据同步作业向BE发送数据的任务 task,线程池的实现在 `SyncTaskPool` 类。 - -默认值:10 - -是否可以动态配置:false - -是否为 Master FE 节点独有的配置项:false diff --git a/docs/zh-CN/administrator-guide/config/user_property.md b/docs/zh-CN/administrator-guide/config/user_property.md deleted file mode 100644 index ff57234296..0000000000 --- a/docs/zh-CN/administrator-guide/config/user_property.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -{ - "title": "用户配置项", - "language": "zh-CN" -} ---- - - - -# User 配置项 - -该文档主要介绍了 User 级别的相关配置项。User 级别的配置生效范围为单个用户。每个用户都可以设置自己的 User property。相互不影响。 - -## 查看配置项 - -FE 启动后,在 MySQL 客户端,通过下面命令查看 User 的配置项: - -`SHOW PROPERTY [FOR user] [LIKE key pattern]` - -具体语法可通过命令:`help show property;` 查询。 - -## 设置配置项 - -FE 启动后,在MySQL 客户端,通过下面命令修改 User 的配置项: - -`SET PROPERTY [FOR 'user'] 'key' = 'value' [, 'key' = 'value']` - -具体语法可通过命令:`help set property;` 查询。 - -User 级别的配置项只会对指定用户生效,并不会影响其他用户的配置。 - -## 应用举例 - -1. 修改用户 Billie 的 `max_user_connections` - - 通过 `SHOW PROPERTY FOR 'Billie' LIKE '%max_user_connections%';` 查看 Billie 用户当前的最大链接数为 100。 - - 通过 `SET PROPERTY FOR 'Billie' 'max_user_connections' = '200';` 修改 Billie 用户的当前最大连接数到 200。 - -## 配置项列表 - -### max_user_connections - - 用户最大的连接数,默认值为100。一般情况不需要更改该参数,除非查询的并发数超过了默认值。 - -### max_query_instances - - 用户同一时间点可使用的instance个数, 默认是-1,小于等于0将会使用配置default_max_query_instances. - -### resource - -### quota - -### default_load_cluster - -### load_cluster diff --git a/docs/zh-CN/administrator-guide/dynamic-partition.md b/docs/zh-CN/administrator-guide/dynamic-partition.md deleted file mode 100644 index 0249b37ecb..0000000000 --- a/docs/zh-CN/administrator-guide/dynamic-partition.md +++ /dev/null @@ -1,460 +0,0 @@ ---- -{ - "title": "动态分区", - "language": "zh-CN" -} ---- - - - -# 动态分区 - -动态分区是在 Doris 0.12 版本中引入的新功能。旨在对表级别的分区实现生命周期管理(TTL),减少用户的使用负担。 - -目前实现了动态添加分区及动态删除分区的功能。 - -动态分区只支持 Range 分区。 - -## 名词解释 - -* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。 -* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。 - -## 原理 - -在某些使用场景下,用户会将表按照天进行分区划分,每天定时执行例行任务,这时需要使用方手动管理分区,否则可能由于使用方没有创建分区导致数据导入失败,这给使用方带来了额外的维护成本。 - -通过动态分区功能,用户可以在建表时设定动态分区的规则。FE 会启动一个后台线程,根据用户指定的规则创建或删除分区。用户也可以在运行时对现有规则进行变更。 - -## 使用方式 - -动态分区的规则可以在建表时指定,或者在运行时进行修改。当前仅支持对单分区列的分区表设定动态分区规则。 - -* 建表时指定: - - ``` - CREATE TABLE tbl1 - (...) - PROPERTIES - ( - "dynamic_partition.prop1" = "value1", - "dynamic_partition.prop2" = "value2", - ... - ) - ``` - -* 运行时修改 - - ``` - ALTER TABLE tbl1 SET - ( - "dynamic_partition.prop1" = "value1", - "dynamic_partition.prop2" = "value2", - ... - ) - ``` - -### 动态分区规则参数 - -动态分区的规则参数都以 `dynamic_partition.` 为前缀: - -* `dynamic_partition.enable` - - 是否开启动态分区特性。可指定为 `TRUE` 或 `FALSE`。如果不填写,默认为 `TRUE`。如果为 `FALSE`,则 Doris 会忽略该表的动态分区规则。 - -* `dynamic_partition.time_unit` - - 动态分区调度的单位。可指定为 `HOUR`、`DAY`、`WEEK`、`MONTH`。分别表示按天、按星期、按月进行分区创建或删除。 - - 当指定为 `HOUR` 时,动态创建的分区名后缀格式为 `yyyyMMddHH`,例如`2020032501`。小时为单位的分区列数据类型不能为 DATE。 - - 当指定为 `DAY` 时,动态创建的分区名后缀格式为 `yyyyMMdd`,例如`20200325`。 - - 当指定为 `WEEK` 时,动态创建的分区名后缀格式为`yyyy_ww`。即当前日期属于这一年的第几周,例如 `2020-03-25` 创建的分区名后缀为 `2020_13`, 表明目前为2020年第13周。 - - 当指定为 `MONTH` 时,动态创建的分区名后缀格式为 `yyyyMM`,例如 `202003`。 - -* `dynamic_partition.time_zone` - - 动态分区的时区,如果不填写,则默认为当前机器的系统的时区,例如 `Asia/Shanghai`,如果想获取当前支持的时区设置,可以参考 `https://en.wikipedia.org/wiki/List_of_tz_database_time_zones`。 - -* `dynamic_partition.start` - - 动态分区的起始偏移,为负数。根据 `time_unit` 属性的不同,以当天(星期/月)为基准,分区范围在此偏移之前的分区将会被删除。如果不填写,则默认为 `-2147483648`,即不删除历史分区。 - -* `dynamic_partition.end` - - 动态分区的结束偏移,为正数。根据 `time_unit` 属性的不同,以当天(星期/月)为基准,提前创建对应范围的分区。 - -* `dynamic_partition.prefix` - - 动态创建的分区名前缀。 - -* `dynamic_partition.buckets` - - 动态创建的分区所对应的分桶数量。 - -* `dynamic_partition.replication_num` - - 动态创建的分区所对应的副本数量,如果不填写,则默认为该表创建时指定的副本数量。 - -* `dynamic_partition.start_day_of_week` - - 当 `time_unit` 为 `WEEK` 时,该参数用于指定每周的起始点。取值为 1 到 7。其中 1 表示周一,7 表示周日。默认为 1,即表示每周以周一为起始点。 - -* `dynamic_partition.start_day_of_month` - - 当 `time_unit` 为 `MONTH` 时,该参数用于指定每月的起始日期。取值为 1 到 28。其中 1 表示每月1号,28 表示每月28号。默认为 1,即表示每月以1号位起始点。暂不支持以29、30、31号为起始日,以避免因闰年或闰月带来的歧义。 - -* `dynamic_partition.create_history_partition` - - 默认为 false。当置为 true 时,Doris 会自动创建所有分区,具体创建规则见下文。同时,FE 的参数 `max_dynamic_partition_num` 会限制总分区数量,以避免一次性创建过多分区。当期望创建的分区个数大于 `max_dynamic_partition_num` 值时,操作将被禁止。 - - 当不指定 `start` 属性时,该参数不生效。 - -* `dynamic_partition.history_partition_num` - - 当 `create_history_partition` 为 `true` 时,该参数用于指定创建历史分区数量。默认值为 -1, 即未设置。 - -* `dynamic_partition.hot_partition_num` - - 指定最新的多少个分区为热分区。对于热分区,系统会自动设置其 `storage_medium` 参数为SSD,并且设置 `storage_cooldown_time`。 - - `hot_partition_num` 是往前 n 天和未来所有分区 - - 我们举例说明。假设今天是 2021-05-20,按天分区,动态分区的属性设置为:hot_partition_num=2, end=3, start=-3。则系统会自动创建以下分区,并且设置 `storage_medium` 和 `storage_cooldown_time` 参数: - - ``` - p20210517:["2021-05-17", "2021-05-18") storage_medium=HDD storage_cooldown_time=9999-12-31 23:59:59 - p20210518:["2021-05-18", "2021-05-19") storage_medium=HDD storage_cooldown_time=9999-12-31 23:59:59 - p20210519:["2021-05-19", "2021-05-20") storage_medium=SSD storage_cooldown_time=2021-05-21 00:00:00 - p20210520:["2021-05-20", "2021-05-21") storage_medium=SSD storage_cooldown_time=2021-05-22 00:00:00 - p20210521:["2021-05-21", "2021-05-22") storage_medium=SSD storage_cooldown_time=2021-05-23 00:00:00 - p20210522:["2021-05-22", "2021-05-23") storage_medium=SSD storage_cooldown_time=2021-05-24 00:00:00 - p20210523:["2021-05-23", "2021-05-24") storage_medium=SSD storage_cooldown_time=2021-05-25 00:00:00 - ``` - -* `dynamic_partition.reserved_history_periods` - - 需要保留的历史分区的时间范围。当`dynamic_partition.time_unit` 设置为 "DAY/WEEK/MONTH" 时,需要以 `[yyyy-MM-dd,yyyy-MM-dd],[...,...]` 格式进行设置。当`dynamic_partition.time_unit` 设置为 "HOUR" 时,需要以 `[yyyy-MM-dd HH:mm:ss,yyyy-MM-dd HH:mm:ss],[...,...]` 的格式来进行设置。如果不设置,默认为 `"NULL"`。 - - 我们举例说明。假设今天是 2021-09-06,按天分类,动态分区的属性设置为: - - ```time_unit="DAY/WEEK/MONTH", end=3, start=-3, reserved_history_periods="[2020-06-01,2020-06-20],[2020-10-31,2020-11-15]"```。 - - 则系统会自动保留: - - ``` - ["2020-06-01","2020-06-20"], - ["2020-10-31","2020-11-15"] - ``` - - 或者 - - ```time_unit="HOUR", end=3, start=-3, reserved_history_periods="[2020-06-01 00:00:00,2020-06-01 03:00:00]"```. - - 则系统会自动保留: - - ``` - ["2020-06-01 00:00:00","2020-06-01 03:00:00"] - ``` - - 这两个时间段的分区。其中,`reserved_history_periods` 的每一个 `[...,...]` 是一对设置项,两者需要同时被设置,且第一个时间不能大于第二个时间``。 - -#### 创建历史分区规则 - -当 `create_history_partition` 为 `true`,即开启创建历史分区功能时,Doris 会根据 `dynamic_partition.start` 和 `dynamic_partition.history_partition_num` 来决定创建历史分区的个数。 - -假设需要创建的历史分区数量为 `expect_create_partition_num`,根据不同的设置具体数量如下: - -1. `create_history_partition` = `true` - - `dynamic_partition.history_partition_num` 未设置,即 -1. - `expect_create_partition_num` = `end` - `start`; - - - `dynamic_partition.history_partition_num` 已设置 - `expect_create_partition_num` = `end` - max(`start`, `-histoty_partition_num`); - -2. `create_history_partition` = `false` - 不会创建历史分区,`expect_create_partition_num` = `end` - 0; - -当 `expect_create_partition_num` 大于 `max_dynamic_partition_num`(默认500)时,禁止创建过多分区。 - -**举例说明:** - -1. 假设今天是 2021-05-20,按天分区,动态分区的属性设置为:`create_history_partition=true, end=3, start=-3, history_partition_num=1`,则系统会自动创建以下分区: - - ``` - p20210519 - p20210520 - p20210521 - p20210522 - p20210523 - ``` - -2. `history_partition_num=5`,其余属性与 1 中保持一直,则系统会自动创建以下分区: - - ``` - p20210517 - p20210518 - p20210519 - p20210520 - p20210521 - p20210522 - p20210523 - ``` - -3. `history_partition_num=-1` 即不设置历史分区数量,其余属性与 1 中保持一直,则系统会自动创建以下分区: - - ``` - p20210517 - p20210518 - p20210519 - p20210520 - p20210521 - p20210522 - p20210523 - ``` - -### 注意事项 - -动态分区使用过程中,如果因为一些意外情况导致 `dynamic_partition.start` 和 `dynamic_partition.end` 之间的某些分区丢失,那么当前时间与 `dynamic_partition.end` 之间的丢失分区会被重新创建,`dynamic_partition.start`与当前时间之间的丢失分区不会重新创建。 - -## 示例 - -1. 表 tbl1 分区列 k1 类型为 DATE,创建一个动态分区规则。按天分区,只保留最近7天的分区,并且预先创建未来3天的分区。 - - ``` - CREATE TABLE tbl1 - ( - k1 DATE, - ... - ) - PARTITION BY RANGE(k1) () - DISTRIBUTED BY HASH(k1) - PROPERTIES - ( - "dynamic_partition.enable" = "true", - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-7", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32" - ); - ``` - - 假设当前日期为 2020-05-29。则根据以上规则,tbl1 会产生以下分区: - - ``` - p20200529: ["2020-05-29", "2020-05-30") - p20200530: ["2020-05-30", "2020-05-31") - p20200531: ["2020-05-31", "2020-06-01") - p20200601: ["2020-06-01", "2020-06-02") - ``` - - 在第二天,即 2020-05-30,会创建新的分区 `p20200602: ["2020-06-02", "2020-06-03")` - - 在 2020-06-06 时,因为 `dynamic_partition.start` 设置为 7,则将删除7天前的分区,即删除分区 `p20200529`。 - -2. 表 tbl1 分区列 k1 类型为 DATETIME,创建一个动态分区规则。按星期分区,只保留最近2个星期的分区,并且预先创建未来2个星期的分区。 - - ``` - CREATE TABLE tbl1 - ( - k1 DATETIME, - ... - ) - PARTITION BY RANGE(k1) () - DISTRIBUTED BY HASH(k1) - PROPERTIES - ( - "dynamic_partition.enable" = "true", - "dynamic_partition.time_unit" = "WEEK", - "dynamic_partition.start" = "-2", - "dynamic_partition.end" = "2", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "8" - ); - ``` - - 假设当前日期为 2020-05-29,是 2020 年的第 22 周。默认每周起始为星期一。则根于以上规则,tbl1 会产生以下分区: - - ``` - p2020_22: ["2020-05-25 00:00:00", "2020-06-01 00:00:00") - p2020_23: ["2020-06-01 00:00:00", "2020-06-08 00:00:00") - p2020_24: ["2020-06-08 00:00:00", "2020-06-15 00:00:00") - ``` - - 其中每个分区的起始日期为当周的周一。同时,因为分区列 k1 的类型为 DATETIME,则分区值会补全时分秒部分,且皆为 0。 - - 在 2020-06-15,即第25周时,会删除2周前的分区,即删除 `p2020_22`。 - - 在上面的例子中,假设用户指定了周起始日为 `"dynamic_partition.start_day_of_week" = "3"`,即以每周三为起始日。则分区如下: - - ``` - p2020_22: ["2020-05-27 00:00:00", "2020-06-03 00:00:00") - p2020_23: ["2020-06-03 00:00:00", "2020-06-10 00:00:00") - p2020_24: ["2020-06-10 00:00:00", "2020-06-17 00:00:00") - ``` - - 即分区范围为当周的周三到下周的周二。 - - * 注:2019-12-31 和 2020-01-01 在同一周内,如果分区的起始日期为 2019-12-31,则分区名为 `p2019_53`,如果分区的起始日期为 2020-01-01,则分区名为 `p2020_01`。 - -3. 表 tbl1 分区列 k1 类型为 DATE,创建一个动态分区规则。按月分区,不删除历史分区,并且预先创建未来2个月的分区。同时设定以每月3号为起始日。 - - ``` - CREATE TABLE tbl1 - ( - k1 DATE, - ... - ) - PARTITION BY RANGE(k1) () - DISTRIBUTED BY HASH(k1) - PROPERTIES - ( - "dynamic_partition.enable" = "true", - "dynamic_partition.time_unit" = "MONTH", - "dynamic_partition.end" = "2", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "8", - "dynamic_partition.start_day_of_month" = "3" - ); - ``` - - 假设当前日期为 2020-05-29。则根于以上规则,tbl1 会产生以下分区: - - ``` - p202005: ["2020-05-03", "2020-06-03") - p202006: ["2020-06-03", "2020-07-03") - p202007: ["2020-07-03", "2020-08-03") - ``` - - 因为没有设置 `dynamic_partition.start`,则不会删除历史分区。 - - 假设今天为 2020-05-20,并设置以每月28号为起始日,则分区范围为: - - ``` - p202004: ["2020-04-28", "2020-05-28") - p202005: ["2020-05-28", "2020-06-28") - p202006: ["2020-06-28", "2020-07-28") - ``` - -## 修改动态分区属性 - -通过如下命令可以修改动态分区的属性: - -``` -ALTER TABLE tbl1 SET -( - "dynamic_partition.prop1" = "value1", - ... -); -``` - -某些属性的修改可能会产生冲突。假设之前分区粒度为 DAY,并且已经创建了如下分区: - -``` -p20200519: ["2020-05-19", "2020-05-20") -p20200520: ["2020-05-20", "2020-05-21") -p20200521: ["2020-05-21", "2020-05-22") -``` - -如果此时将分区粒度改为 MONTH,则系统会尝试创建范围为 `["2020-05-01", "2020-06-01")` 的分区,而该分区的分区范围和已有分区冲突,所以无法创建。而范围为 `["2020-06-01", "2020-07-01")` 的分区可以正常创建。因此,2020-05-22 到 2020-05-30 时间段的分区,需要自行填补。 - -### 查看动态分区表调度情况 - -通过以下命令可以进一步查看当前数据库下,所有动态分区表的调度情况: - -``` -mysql> SHOW DYNAMIC PARTITION TABLES; -+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+ -| TableName | Enable | TimeUnit | Start | End | Prefix | Buckets | StartOf | LastUpdateTime | LastSchedulerTime | State | LastCreatePartitionMsg | LastDropPartitionMsg | ReservedHistoryPeriods | -+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+ -| d3 | true | WEEK | -3 | 3 | p | 1 | MONDAY | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | [2021-12-01,2021-12-31] | -| d5 | true | DAY | -7 | 3 | p | 32 | N/A | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -| d4 | true | WEEK | -3 | 3 | p | 1 | WEDNESDAY | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -| d6 | true | MONTH | -2147483648 | 2 | p | 8 | 3rd | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -| d2 | true | DAY | -3 | 3 | p | 32 | N/A | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -| d7 | true | MONTH | -2147483648 | 5 | p | 8 | 24th | N/A | 2020-05-25 14:29:24 | NORMAL | N/A | N/A | NULL | -+-----------+--------+----------+-------------+------+--------+---------+-----------+----------------+---------------------+--------+------------------------+----------------------+-------------------------+ -7 rows in set (0.02 sec) -``` - -* LastUpdateTime: 最后一次修改动态分区属性的时间 -* LastSchedulerTime: 最后一次执行动态分区调度的时间 -* State: 最后一次执行动态分区调度的状态 -* LastCreatePartitionMsg: 最后一次执行动态添加分区调度的错误信息 -* LastDropPartitionMsg: 最后一次执行动态删除分区调度的错误信息 - -## 高级操作 - -### FE 配置项 - -* dynamic\_partition\_enable - - 是否开启 Doris 的动态分区功能。默认为 false,即关闭。该参数只影响动态分区表的分区操作,不影响普通表。可以通过修改 fe.conf 中的参数并重启 FE 生效。也可以在运行时执行以下命令生效: - - MySQL 协议: - - `ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true")` - - HTTP 协议: - - `curl --location-trusted -u username:password -XGET http://fe_host:fe_http_port/api/_set_config?dynamic_partition_enable=true` - - 若要全局关闭动态分区,则设置此参数为 false 即可。 - -* dynamic\_partition\_check\_interval\_seconds - - 动态分区线程的执行频率,默认为600(10分钟),即每10分钟进行一次调度。可以通过修改 fe.conf 中的参数并重启 FE 生效。也可以在运行时执行以下命令修改: - - MySQL 协议: - - `ADMIN SET FRONTEND CONFIG ("dynamic_partition_check_interval_seconds" = "7200")` - - HTTP 协议: - - `curl --location-trusted -u username:password -XGET http://fe_host:fe_http_port/api/_set_config?dynamic_partition_check_interval_seconds=432000` - -### 动态分区表与手动分区表相互转换 - -对于一个表来说,动态分区和手动分区可以自由转换,但二者不能同时存在,有且只有一种状态。 - -#### 手动分区转换为动态分区 - -如果一个表在创建时未指定动态分区,可以通过 `ALTER TABLE` 在运行时修改动态分区相关属性来转化为动态分区,具体示例可以通过 `HELP ALTER TABLE` 查看。 - -开启动态分区功能后,Doris 将不再允许用户手动管理分区,会根据动态分区属性来自动管理分区。 - -**注意**:如果已设定 `dynamic_partition.start`,分区范围在动态分区起始偏移之前的历史分区将会被删除。 - -#### 动态分区转换为手动分区 - -通过执行 `ALTER TABLE tbl_name SET ("dynamic_partition.enable" = "false")` 即可关闭动态分区功能,将其转换为手动分区表。 - -关闭动态分区功能后,Doris 将不再自动管理分区,需要用户手动通过 `ALTER TABLE` 的方式创建或删除分区。 - -## 常见问题 - -1. 创建动态分区表后提示 ```Could not create table with dynamic partition when fe config dynamic_partition_enable is false``` - - 由于动态分区的总开关,也就是 FE 的配置 ```dynamic_partition_enable``` 为 false,导致无法创建动态分区表。 - - 这时候请修改 FE 的配置文件,增加一行 ```dynamic_partition_enable=true```,并重启 FE。或者执行命令 ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true") 将动态分区开关打开即可。 diff --git a/docs/zh-CN/administrator-guide/export-manual.md b/docs/zh-CN/administrator-guide/export-manual.md deleted file mode 100644 index b71f9bfce7..0000000000 --- a/docs/zh-CN/administrator-guide/export-manual.md +++ /dev/null @@ -1,202 +0,0 @@ ---- -{ - "title": "数据导出", - "language": "zh-CN" -} ---- - - - -# 数据导出 - -数据导出(Export)是 Doris 提供的一种将数据导出的功能。该功能可以将用户指定的表或分区的数据,以文本的格式,通过 Broker 进程导出到远端存储上,如 HDFS/BOS 等。 - -本文档主要介绍 Export 的基本原理、使用方式、最佳实践以及注意事项。 - -## 名词解释 - -* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。 -* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。 -* Broker:Doris 可以通过 Broker 进程对远端存储进行文件操作。 -* Tablet:数据分片。一个表会划分成多个数据分片。 - -## 原理 - -用户提交一个 Export 作业后。Doris 会统计这个作业涉及的所有 Tablet。然后对这些 Tablet 进行分组,每组生成一个特殊的查询计划。该查询计划会读取所包含的 Tablet 上的数据,然后通过 Broker 将数据写到远端存储指定的路径中,也可以通过S3协议直接导出到支持S3协议的远端存储上。 - -总体的调度方式如下: - -``` -+--------+ -| Client | -+---+----+ - | 1. Submit Job - | -+---v--------------------+ -| FE | -| | -| +-------------------+ | -| | ExportPendingTask | | -| +-------------------+ | -| | 2. Generate Tasks -| +--------------------+ | -| | ExportExporingTask | | -| +--------------------+ | -| | -| +-----------+ | +----+ +------+ +---------+ -| | QueryPlan +----------------> BE +--->Broker+---> | -| +-----------+ | +----+ +------+ | Remote | -| +-----------+ | +----+ +------+ | Storage | -| | QueryPlan +----------------> BE +--->Broker+---> | -| +-----------+ | +----+ +------+ +---------+ -+------------------------+ 3. Execute Tasks - -``` - -1. 用户提交一个 Export 作业到 FE。 -2. FE 的 Export 调度器会通过两阶段来执行一个 Export 作业: - 1. PENDING:FE 生成 ExportPendingTask,向 BE 发送 snapshot 命令,对所有涉及到的 Tablet 做一个快照。并生成多个查询计划。 - 2. EXPORTING:FE 生成 ExportExportingTask,开始执行查询计划。 - -### 查询计划拆分 - -Export 作业会生成多个查询计划,每个查询计划负责扫描一部分 Tablet。每个查询计划扫描的 Tablet 个数由 FE 配置参数 `export_tablet_num_per_task` 指定,默认为 5。即假设一共 100 个 Tablet,则会生成 20 个查询计划。用户也可以在提交作业时,通过作业属性 `tablet_num_per_task` 指定这个数值。 - -一个作业的多个查询计划顺序执行。 - -### 查询计划执行 - -一个查询计划扫描多个分片,将读取的数据以行的形式组织,每 1024 行为一个 batch,调用 Broker 写入到远端存储上。 - -查询计划遇到错误会整体自动重试 3 次。如果一个查询计划重试 3 次依然失败,则整个作业失败。 - -Doris 会首先在指定的远端存储的路径中,建立一个名为 `__doris_export_tmp_12345` 的临时目录(其中 `12345` 为作业 id)。导出的数据首先会写入这个临时目录。每个查询计划会生成一个文件,文件名示例: - -`export-data-c69fcf2b6db5420f-a96b94c1ff8bccef-1561453713822` - -其中 `c69fcf2b6db5420f-a96b94c1ff8bccef` 为查询计划的 query id。`1561453713822` 为文件生成的时间戳。 - -当所有数据都导出后,Doris 会将这些文件 rename 到用户指定的路径中。 - -### Broker 参数 - -Export 需要借助 Broker 进程访问远端存储,不同的 Broker 需要提供不同的参数,具体请参阅 [Broker文档](./broker.md) - -## 使用示例 - -Export 的详细命令可以通过 `HELP EXPORT;` 。举例如下: - -``` -EXPORT TABLE db1.tbl1 -PARTITION (p1,p2) -[WHERE [expr]] -TO "hdfs://host/path/to/export/" -PROPERTIES -( - "label" = "mylabel", - "column_separator"=",", - "columns" = "col1,col2", - "exec_mem_limit"="2147483648", - "timeout" = "3600" -) -WITH BROKER "hdfs" -( - "username" = "user", - "password" = "passwd" -); -``` - -* `label`:本次导出作业的标识。后续可以使用这个标识查看作业状态。 -* `column_separator`:列分隔符。默认为 `\t`。支持不可见字符,比如 '\x07'。 -* `columns`:要导出的列,使用英文状态逗号隔开,如果不填这个参数默认是导出表的所有列。 -* `line_delimiter`:行分隔符。默认为 `\n`。支持不可见字符,比如 '\x07'。 -* `exec_mem_limit`: 表示 Export 作业中,一个查询计划在单个 BE 上的内存使用限制。默认 2GB。单位字节。 -* `timeout`:作业超时时间。默认 2小时。单位秒。 -* `tablet_num_per_task`:每个查询计划分配的最大分片数。默认为 5。 - -提交作业后,可以通过 `SHOW EXPORT` 命令查询导入作业状态。结果举例如下: - -``` - JobId: 14008 - Label: mylabel - State: FINISHED - Progress: 100% - TaskInfo: {"partitions":["*"],"exec mem limit":2147483648,"column separator":",","line delimiter":"\n","tablet num":1,"broker":"hdfs","coord num":1,"db":"default_cluster:db1","tbl":"tbl3"} - Path: bos://bj-test-cmy/export/ -CreateTime: 2019-06-25 17:08:24 - StartTime: 2019-06-25 17:08:28 -FinishTime: 2019-06-25 17:08:34 - Timeout: 3600 - ErrorMsg: N/A -``` - -* JobId:作业的唯一 ID -* Label:自定义作业标识 -* State:作业状态: - * PENDING:作业待调度 - * EXPORTING:数据导出中 - * FINISHED:作业成功 - * CANCELLED:作业失败 -* Progress:作业进度。该进度以查询计划为单位。假设一共 10 个查询计划,当前已完成 3 个,则进度为 30%。 -* TaskInfo:以 Json 格式展示的作业信息: - * db:数据库名 - * tbl:表名 - * partitions:指定导出的分区。`*` 表示所有分区。 - * exec mem limit:查询计划内存使用限制。单位字节。 - * column separator:导出文件的列分隔符。 - * line delimiter:导出文件的行分隔符。 - * tablet num:涉及的总 Tablet 数量。 - * broker:使用的 broker 的名称。 - * coord num:查询计划的个数。 -* Path:远端存储上的导出路径。 -* CreateTime/StartTime/FinishTime:作业的创建时间、开始调度时间和结束时间。 -* Timeout:作业超时时间。单位是秒。该时间从 CreateTime 开始计算。 -* ErrorMsg:如果作业出现错误,这里会显示错误原因。 - -## 最佳实践 - -### 查询计划的拆分 - -一个 Export 作业有多少查询计划需要执行,取决于总共有多少 Tablet,以及一个查询计划最多可以分配多少个 Tablet。因为多个查询计划是串行执行的,所以如果让一个查询计划处理更多的分片,则可以减少作业的执行时间。但如果查询计划出错(比如调用 Broker 的 RPC 失败,远端存储出现抖动等),过多的 Tablet 会导致一个查询计划的重试成本变高。所以需要合理安排查询计划的个数以及每个查询计划所需要扫描的分片数,在执行时间和执行成功率之间做出平衡。一般建议一个查询计划扫描的数据量在 3-5 GB内(一个表的 Tablet 的大小以及个数可以通过 `SHOW TABLET FROM tbl_name;` 语句查看。)。 - -### exec\_mem\_limit - -通常一个 Export 作业的查询计划只有 `扫描`-`导出` 两部分,不涉及需要太多内存的计算逻辑。所以通常 2GB 的默认内存限制可以满足需求。但在某些场景下,比如一个查询计划,在同一个 BE 上需要扫描的 Tablet 过多,或者 Tablet 的数据版本过多时,可能会导致内存不足。此时需要通过这个参数设置更大的内存,比如 4GB、8GB 等。 - -## 注意事项 - -* 不建议一次性导出大量数据。一个 Export 作业建议的导出数据量最大在几十 GB。过大的导出会导致更多的垃圾文件和更高的重试成本。 -* 如果表数据量过大,建议按照分区导出。 -* 在 Export 作业运行过程中,如果 FE 发生重启或切主,则 Export 作业会失败,需要用户重新提交。 -* 如果 Export 作业运行失败,在远端存储中产生的 `__doris_export_tmp_xxx` 临时目录,以及已经生成的文件不会被删除,需要用户手动删除。 -* 如果 Export 作业运行成功,在远端存储中产生的 `__doris_export_tmp_xxx` 目录,根据远端存储的文件系统语义,可能会保留,也可能会被清除。比如在百度对象存储(BOS)中,通过 rename 操作将一个目录中的最后一个文件移走后,该目录也会被删除。如果该目录没有被清除,用户可以手动清除。 -* 当 Export 运行完成后(成功或失败),FE 发生重启或切主,则 `SHOW EXPORT` 展示的作业的部分信息会丢失,无法查看。 -* Export 作业只会导出 Base 表的数据,不会导出 Rollup Index 的数据。 -* Export 作业会扫描数据,占用 IO 资源,可能会影响系统的查询延迟。 - -## 相关配置 - -### FE - -* `export_checker_interval_second`:Export 作业调度器的调度间隔,默认为 5 秒。设置该参数需重启 FE。 -* `export_running_job_num_limit`:正在运行的 Export 作业数量限制。如果超过,则作业将等待并处于 PENDING 状态。默认为 5,可以运行时调整。 -* `export_task_default_timeout_second`:Export 作业默认超时时间。默认为 2 小时。可以运行时调整。 -* `export_tablet_num_per_task`:一个查询计划负责的最大分片数。默认为 5。 - diff --git a/docs/zh-CN/administrator-guide/export_with_mysql_dump.md b/docs/zh-CN/administrator-guide/export_with_mysql_dump.md deleted file mode 100644 index 1bd664fabf..0000000000 --- a/docs/zh-CN/administrator-guide/export_with_mysql_dump.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -{ -"title": "使用mysqldump数据导出表结构或者数据", -"language": "zh-CN" -} ---- - - - -# 使用mysqldump数据导出表结构或者数据 -Doris 在0.15 之后的版本已经支持通过`mysqldump` 工具导出数据或者表结构 - -## 使用示例 -### 导出 - 1. 导出 test 数据库中的 table1 表:`mysqldump -h127.0.0.1 -P9030 -uroot --no-tablespaces --databases test --tables table1` - 2. 导出 test 数据库中的 table1 表结构:`mysqldump -h127.0.0.1 -P9030 -uroot --no-tablespaces --databases test --tables table1 --no-data` - 3. 导出 test1, test2 数据库中所有表:`mysqldump -h127.0.0.1 -P9030 -uroot --no-tablespaces --databases test1 test2` - 4. 导出所有数据库和表 `mysqldump -h127.0.0.1 -P9030 -uroot --no-tablespaces --all-databases` -更多的使用参数可以参考`mysqldump` 的使用手册 -### 导入 - `mysqldump` 导出的结果可以重定向到文件中,之后可以通过 source 命令导入到Doris 中 `source filename.sql` -## 注意 - 1. 由于Doris 中没有mysql 里的 tablespace 概念,因此在使用mysqldump 时要加上 `--no-tablespaces` 参数 - 2. 使用mysqldump 导出数据和表结构仅用于开发测试或者数据量很小的情况,请勿用于大数据量的生产环境 diff --git a/docs/zh-CN/administrator-guide/ldap.md b/docs/zh-CN/administrator-guide/ldap.md deleted file mode 100644 index 5511fa15c0..0000000000 --- a/docs/zh-CN/administrator-guide/ldap.md +++ /dev/null @@ -1,177 +0,0 @@ ---- -{ - "title": "LDAP", - "language": "zh-CN" -} ---- - - - -# LDAP - -接入第三方LDAP服务为Doris提供验证登录和组授权服务。 - -LDAP验证登录指的是接入LDAP服务的密码验证来补充Doris的验证登录。Doris优先使用LDAP验证用户密码,如果LDAP服务中不存在该用户则继续使用Doris验证密码,如果LDAP密码正确但是Doris中没有对应账户则创建临时用户登录Doris。 - -LDAP组授权是将LDAP中的group映射到Doris中的Role,如果用户在LDAP中属于多个用户组,登录Doris后用户将获得所有组对应Role的权限,要求组名与Role名字相同。 - -## 名词解释 - -* LDAP: 轻量级目录访问协议,能够实现账号密码的集中管理。 -* 权限 Privilege:权限作用的对象是节点、数据库或表。不同的权限代表不同的操作许可。 -* 角色 Role:Doris可以创建自定义命名的角色。角色可以被看做是一组权限的集合。 - -## 启用LDAP认证 -### server端配置 - -需要在fe/conf/ldap.conf文件中配置LDAP基本信息,另有LDAP管理员密码需要使用sql语句进行设置。 - -#### 配置fe/conf/ldap.conf文件: -* ldap_authentication_enabled = false - 设置值为“true”启用LDAP验证;当值为“false”时,不启用LDAP验证,该配置文件的其他配置项都无效。 - -* ldap_host = 127.0.0.1 - LDAP服务ip。 - -* ldap_port = 389 - LDAP服务端口,默认明文传输端口为389,目前Doris的LDAP功能仅支持明文密码传输。 - -* ldap_admin_name = cn=admin,dc=domain,dc=com - LDAP管理员账户“Distinguished Name”。当用户使用LDAP验证登录Doris时,Doris会绑定该管理员账户在LDAP中搜索用户信息。 - -* ldap_user_basedn = ou=people,dc=domain,dc=com - Doris在LDAP中搜索用户信息时的base dn。 - -* ldap_user_filter = (&(uid={login})) -* - Doris在LDAP中搜索用户信息时的过滤条件,占位符“{login}”会被替换为登录用户名。必须保证通过该过滤条件搜索的用户唯一,否则Doris无法通过LDAP验证密码,登录时会出现“ERROR 5081 (42000): user is not unique in LDAP server.”的错误信息。 - - 例如使用LDAP用户节点uid属性作为登录Doris的用户名可以配置该项为: - ldap_user_filter = (&(uid={login})); - 使用LDAP用户邮箱前缀作为用户名可配置该项: - ldap_user_filter = (&(mail={login}@baidu.com))。 - -* ldap_group_basedn = ou=group,dc=domain,dc=com - Doris在LDAP中搜索组信息时的base dn。如果不配置该项,将不启用LDAP组授权。 - -#### 设置LDAP管理员密码: -配置好ldap.conf文件后启动fe,使用root或admin账号登录Doris,执行sql: -``` -set ldap_admin_password = 'ldap_admin_password'; -``` - -### Client端配置 -客户端使用LDAP验证需要启用mysql客户端明文验证插件,使用命令行登录Doris可以使用下面两种方式之一启用mysql明文验证插件: - -* 设置环境变量LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN值1。 - - 例如在linux或者max环境中可以使用: - ``` - echo "export LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN=1" >> ~/.bash_profile && source ~/.bash_profile - ``` - -* 每次登录Doris时添加参数“--enable-cleartext-plugin”: - ``` - mysql -hDORIS_HOST -PDORIS_PORT -u user -p --enable-cleartext-plugin - - 输入ldap密码 - ``` - -## LDAP认证详解 -LDAP密码验证和组授权是Doris密码验证和授权的补充,开启LDAP功能并不能完全替代Doris的密码验证和授权,而是与Doris密码验证和授权并存。 - -### LDAP验证登录详解 -开启LDAP后,用户在Doris和DLAP中存在以下几种情况: - -|LDAP用户|Doris用户|密码|登录情况|登录Doris的用户| -|--|--|--|--|--| -|存在|存在|LDAP密码|登录成功|Doris用户| -|存在|存在|Doris密码|登录失败|无| -|不存在|存在|Doris密码|登录成功|Doris用户| -|存在|不存在|LDAP密码|登录成功|Ldap临时用户| - -开启LDAP后,用户使用mysql client登录时,Doris会先通过LDAP服务验证用户密码,如果LDAP存在用户且密码正确,Doris则使用该用户登录;此时Doris若存在对应账户则直接登录该账户,如果不存在对应账户则为用户创建临时账户并登录该账户。临时账户具有具有相应对权限(参见LDAP组授权),仅对当前连接有效,doris不会创建该用户,也不会产生创建用户对元数据。 -如果LDAP服务中不存在登录用户,则使用Doris进行密码认证。 - -以下假设已开启LDAP认证,配置ldap_user_filter = (&(uid={login})),且其他配置项都正确,客户端设置环境变量LIBMYSQL_ENABLE_CLEARTEXT_PLUGIN=1 - -例如: - -#### 1:Doris和LDAP中都存在账户: - -存在Doris账户:jack@'172.10.1.10',密码:123456 -LDAP用户节点存在属性:uid: jack 用户密码:abcdef -使用以下命令登录Doris可以登录jack@'172.10.1.10'账户: -``` -mysql -hDoris_HOST -PDoris_PORT -ujack -p abcdef -``` - -使用以下命令将登录失败: -``` -mysql -hDoris_HOST -PDoris_PORT -ujack -p 123456 -``` - -#### 2:LDAP中存在用户,Doris中不存在对应账户: - -LDAP用户节点存在属性:uid: jack 用户密码:abcdef -使用以下命令创建临时用户并登录jack@'%',临时用户具有基本权限 DatabasePrivs:Select_priv, 用户退出登录后Doris将删除该临时用户: -``` -mysql -hDoris_HOST -PDoris_PORT -ujack -p abcdef -``` - -#### 3:LDAP不存在用户: - -存在Doris账户:jack@'172.10.1.10',密码:123456 -使用Doris密码登录账户,成功: -``` -mysql -hDoris_HOST -PDoris_PORT -ujack -p 123456 -``` - -### LDAP组授权详解 - -DLAP用户dn是LDAP组节点的“member”属性则Doris认为用户属于该组。LDAP组授权是将LDAP中的group映射到Doris中的role,并将所有对应的role权限授予登录用户,用户退出登录后Doris会撤销对应的role权限。在使用LDAP组授权前应该在Doris中创建相应对role,并为role授权。 - -登录用户权限跟Doris用户和组权限有关,见下表: -|LDAP用户|Doris用户|登录用户的权限| -|--|--|--| -|存在|存在|LDAP组权限 + Doris用户权限| -|不存在|存在|Doris用户权限| -|存在|不存在|LDAP组权限| - -如果登录的用户为临时用户,且不存在组权限,则该用户默认具有information_schema的select_priv权限 - -举例: -LDAP用户dn是LDAP组节点的“member”属性则认为用户属于该组,Doris会截取组dn的第一个Rdn作为组名。 -例如用户dn为“uid=jack,ou=aidp,dc=domain,dc=com”, 组信息如下: -``` -dn: cn=doris_rd,ou=group,dc=domain,dc=com -objectClass: groupOfNames -member: uid=jack,ou=aidp,dc=domain,dc=com -``` -则组名为doris_rd。 - -假如jack还属于LDAP组doris_qa、doris_pm;Doris存在role:doris_rd、doris_qa、doris_pm,在使用LDAP验证登录后,用户不但具有该账户原有的权限,还将获得role doris_rd、doris_qa和doris_pm的权限。 - -## LDAP验证的局限 - -* 目前Doris的LDAP功能只支持明文密码验证,即用户登录时,密码在client与fe之间、fe与LDAP服务之间以明文的形式传输。 -* 当前的LDAP验证只支持在mysql协议下进行密码验证,如果使用Http接口则无法使用LDAP用户进行验证。 -* 临时用户不具有用户属性。 - diff --git a/docs/zh-CN/administrator-guide/load-data/batch-delete-manual.md b/docs/zh-CN/administrator-guide/load-data/batch-delete-manual.md deleted file mode 100644 index e86cc895b6..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/batch-delete-manual.md +++ /dev/null @@ -1,205 +0,0 @@ ---- -{ - "title": "批量删除", - "language": "zh-CN" -} ---- - - - -# 批量删除 -目前Doris 支持broker load, routine load, stream load 等多种导入方式,对于数据的删除目前只能通过delete 语句进行删除,使用delete 语句的方式删除时,每执行一次delete 都会生成一个新的数据版本,如果频繁删除会严重影响查询性能,并且在使用delete 方式删除时,是通过生成一个空的rowset来记录删除条件实现,每次读取都要对删除条件进行过滤,同样在条件较多时会对性能造成影响。对比其他的系统,greenplum 的实现方式更像是传统数据库产品,snowflake 通过merge 语法实现。 - -对于类似于cdc 数据的导入的场景,数据中insert 和delete 一般是穿插出现的,面对这种场景我们目前的导入方式也无法满足,即使我们能够分离出insert 和delete 虽然可以解决导入的问题,但是仍然解决不了删除的问题。使用批量删除功能可以解决这些个场景的需求。 -数据导入有三种合并方式: -1. APPEND: 数据全部追加到现有数据中 -2. DELETE: 删除所有与导入数据key 列值相同的行 -3. MERGE: 根据 DELETE ON 的决定 APPEND 还是 DELETE - -## 原理 -通过增加一个隐藏列`__DORIS_DELETE_SIGN__`实现,因为我们只是在unique 模型上做批量删除,因此只需要增加一个 类型为bool 聚合函数为replace 的隐藏列即可。在be 各种聚合写入流程都和正常列一样,读取方案有两个: - -在fe遇到 * 等扩展时去掉`__DORIS_DELETE_SIGN__`,并且默认加上 `__DORIS_DELETE_SIGN__ != true` 的条件 -be 读取时都会加上一列进行判断,通过条件确定是否删除。 - -### 导入 - -导入时在fe 解析时将隐藏列的值设置成 `DELETE ON` 表达式的值,其他的聚合行为和replace的聚合列相同 - -### 读取 - -读取时在所有存在隐藏列的olapScanNode上增加`__DORIS_DELETE_SIGN__ != true` 的条件,be 不感知这一过程,正常执行 - -### Cumulative Compaction - -Cumulative Compaction 时将隐藏列看作正常的列处理,Compaction逻辑没有变化 - -### Base Compaction - -Base Compaction 时要将标记为删除的行的删掉,以减少数据占用的空间 - -### 语法 -导入的语法设计方面主要是增加一个指定删除标记列的字段的column 映射,并且需要在导入数据中增加这一列,各个导入方式设置的方法如下 - -#### stream load - -stream load 的写法在在header 中的 columns 字段增加一个设置删除标记列的字段, 示例 -` -H "columns: k1, k2, label_c3" -H "merge_type: [MERGE|APPEND|DELETE]" -H "delete: label_c3=1"` - -#### broker load - -在`PROPERTIES ` 处设置删除标记列的字段 - -``` -LOAD LABEL db1.label1 -( - [MERGE|APPEND|DELETE] DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1") - INTO TABLE tbl1 - COLUMNS TERMINATED BY "," - (tmp_c1,tmp_c2, label_c3) - SET - ( - id=tmp_c2, - name=tmp_c1, - ) - [DELETE ON label=true] - -) -WITH BROKER 'broker' -( - "username"="user", - "password"="pass" -) -PROPERTIES -( - "timeout" = "3600" - -); - -``` - -#### routine load - -routine load 在`columns` 字段增加映射 映射方式同上,示例如下 - -``` - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - [WITH MERGE|APPEND|DELETE] - COLUMNS(k1, k2, k3, v1, v2, label), - WHERE k1 > 100 and k2 like "%doris%" - [DELETE ON label=true] - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,0,200" - ); -``` - -## 启用批量删除支持 -启用批量删除支持 有两种形式: -1. 通过在fe 配置文件中增加`enable_batch_delete_by_default=true` 重启fe 后新建表的都支持批量删除,此选项默认为false - -2. 对于没有更改上述fe 配置或对于以存在的不支持批量删除功能的表,可以使用如下语句: -`ALTER TABLE tablename ENABLE FEATURE "BATCH_DELETE"` 来启用批量删除。本操作本质上是一个schema change 操作,操作立即返回,可以通过`show alter table column` 来确认操作是否完成。 - -如果确定一个表是否支持批量删除,可以通过 设置一个session variable 来显示隐藏列 `SET show_hidden_columns=true` ,之后使用`desc tablename`,如果输出中有`__DORIS_DELETE_SIGN__` 列则支持,如果没有则不支持 - -## 注意 -1. 由于除stream load 外的导入操作在doris 内部有可能乱序执行,因此在使用`MERGE` 方式导入时如果不是stream load,需要与 load sequence 一起使用,具体的 语法可以参照sequence列 相关的文档 -2. `DELETE ON` 条件只能与 MERGE 一起使用 - -## 使用示例 -下面以stream load 为例 展示下使用方式 -1. 正常导入数据: -``` -curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: APPEND" -T ~/table1_data http://127.0.0.1:8130/api/test/table1/_stream_load -``` -其中的APPEND 条件可以省略,与下面的语句效果相同: -``` -curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -T ~/table1_data http://127.0.0.1:8130/api/test/table1/_stream_load -``` -2. 将与导入数据key 相同的数据全部删除 -``` -curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: DELETE" -T ~/table1_data http://127.0.0.1:8130/api/test/table1/_stream_load -``` -假设导入表中原有数据为: -``` -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 3 | 2 | tom | 2 | -| 4 | 3 | bush | 3 | -| 5 | 3 | helen | 3 | -+--------+----------+----------+------+ -``` -导入数据为: -``` -3,2,tom,0 -``` -导入后数据变成: -``` -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 4 | 3 | bush | 3 | -| 5 | 3 | helen | 3 | -+--------+----------+----------+------+ -``` -3. 将导入数据中与`site_id=1` 的行的key列相同的行 -``` -curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv" -H "merge_type: MERGE" -H "delete: siteid=1" -T ~/table1_data http://127.0.0.1:8130/api/test/table1/_stream_load -``` -假设导入前数据为: -``` -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 4 | 3 | bush | 3 | -| 5 | 3 | helen | 3 | -| 1 | 1 | jim | 2 | -+--------+----------+----------+------+ -``` - 导入数据为: -``` -2,1,grace,2 -3,2,tom,2 -1,1,jim,2 -``` -导入后为: -``` -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 4 | 3 | bush | 3 | -| 2 | 1 | grace | 2 | -| 3 | 2 | tom | 2 | -| 5 | 3 | helen | 3 | -+--------+----------+----------+------+ -``` \ No newline at end of file diff --git a/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md b/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md deleted file mode 100644 index 8862a0a113..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/binlog-load-manual.md +++ /dev/null @@ -1,502 +0,0 @@ ---- -{ - "title": "Binlog Load", - "language": "zh-CN" -} ---- - - - - -# Binlog Load -Binlog Load提供了一种使Doris增量同步用户在Mysql数据库的对数据更新操作的CDC(Change Data Capture)功能。 - -## 适用场景 - -* INSERT/UPDATE/DELETE支持 -* 过滤Query -* 暂不兼容DDL语句 - -## 名词解释 -1. Frontend(FE):Doris 系统的元数据和调度节点。在导入流程中主要负责导入 plan 生成和导入任务的调度工作。 -2. Backend(BE):Doris 系统的计算和存储节点。在导入流程中主要负责数据的 ETL 和存储。 -3. Canal:阿里巴巴开源的Mysql Binlog日志解析工具。提供增量数据订阅&消费等功能。 -4. Batch:Canal发送到客户端的一批数据,具有全局唯一自增的ID。 -5. SyncJob:用户提交的一个数据同步作业。 -6. Receiver: 负责订阅并接收Canal的数据。 -7. Consumer: 负责分发Receiver接收的数据到各个Channel。 -8. Channel: 接收Consumer分发的数据的渠道,创建发送数据的子任务,控制单个表事务的开启、提交、终止。 -9. Task:Channel向Be发送数据的子任务。 - -## 基本原理 -在第一期的设计中,Binlog Load需要依赖canal作为中间媒介,让canal伪造成一个从节点去获取Mysql主节点上的Binlog并解析,再由Doris去获取Canal上解析好的数据,主要涉及Mysql端、Canal端以及Doris端,总体数据流向如下: - -``` -+---------------------------------------------+ -| Mysql | -+----------------------+----------------------+ - | Binlog -+----------------------v----------------------+ -| Canal Server | -+-------------------+-----^-------------------+ - Get | | Ack -+-------------------|-----|-------------------+ -| FE | | | -| +-----------------|-----|----------------+ | -| | Sync Job | | | | -| | +------------v-----+-----------+ | | -| | | Canal Client | | | -| | | +-----------------------+ | | | -| | | | Receiver | | | | -| | | +-----------------------+ | | | -| | | +-----------------------+ | | | -| | | | Consumer | | | | -| | | +-----------------------+ | | | -| | +------------------------------+ | | -| +----+---------------+--------------+----+ | -| | | | | -| +----v-----+ +-----v----+ +-----v----+ | -| | Channel1 | | Channel2 | | Channel3 | | -| | [Table1] | | [Table2] | | [Table3] | | -| +----+-----+ +-----+----+ +-----+----+ | -| | | | | -| +--|-------+ +---|------+ +---|------+| -| +---v------+| +----v-----+| +----v-----+|| -| +----------+|+ +----------+|+ +----------+|+| -| | Task |+ | Task |+ | Task |+ | -| +----------+ +----------+ +----------+ | -+----------------------+----------------------+ - | | | -+----v-----------------v------------------v---+ -| Coordinator | -| BE | -+----+-----------------+------------------+---+ - | | | -+----v---+ +---v----+ +----v---+ -| BE | | BE | | BE | -+--------+ +--------+ +--------+ - -``` - -如上图,用户向FE提交一个数据同步作业。 - -FE会为每个数据同步作业启动一个canal client,来向canal server端订阅并获取数据。 - -client中的receiver将负责通过Get命令接收数据,每获取到一个数据batch,都会由consumer根据对应表分发到不同的channel,每个channel都会为此数据batch产生一个发送数据的子任务Task。 - -在FE上,一个Task是channel向BE发送数据的子任务,里面包含分发到当前channel的同一个batch的数据。 - -channel控制着单个表事务的开始、提交、终止。一个事务周期内,一般会从consumer获取到多个batch的数据,因此会产生多个向BE发送数据的子任务Task,在提交事务成功前,这些Task不会实际生效。 - -满足一定条件时(比如超过一定时间、达到提交最大数据大小),consumer将会阻塞并通知各个channel提交事务。 - -当且仅当所有channel都提交成功,才会通过Ack命令通知canal并继续获取并消费数据。 - -如果有任意channel提交失败,将会重新从上一次消费成功的位置获取数据并再次提交(已提交成功的channel不会再次提交以保证幂等性)。 - -整个数据同步作业中,FE通过以上流程不断的从canal获取数据并提交到BE,来完成数据同步。 - -## 配置Mysql端 - -在Mysql Cluster模式的主从同步中,二进制日志文件(Binlog)记录了主节点上的所有数据变化,数据在Cluster的多个节点间同步、备份都要通过Binlog日志进行,从而提高集群的可用性。架构通常由一个主节点(负责写)和一个或多个从节点(负责读)构成,所有在主节点上发生的数据变更将会复制给从节点。 - -**注意:目前必须要使用Mysql 5.7及以上的版本才能支持Binlog Load功能。** - -要打开mysql的二进制binlog日志功能,则需要编辑my.cnf配置文件设置一下。 - -``` -[mysqld] -log-bin = mysql-bin # 开启 binlog -binlog-format=ROW # 选择 ROW 模式 -``` - -### Mysql端说明 - -在Mysql上,Binlog命名格式为mysql-bin.000001、mysql-bin.000002... ,满足一定条件时mysql会去自动切分Binlog日志: - -1. mysql重启了 -2. 客户端输入命令flush logs -3. binlog文件大小超过1G - -要定位Binlog的最新的消费位置,可以通过binlog文件名和position(偏移量)。 - -例如,各个从节点上会保存当前消费到的binlog位置,方便随时断开连接、重新连接和继续消费。 - -``` ---------------------- --------------------- -| Slave | read | Master | -| FileName/Position | <<<--------------------------- | Binlog Files | ---------------------- --------------------- -``` - -对于主节点来说,它只负责写入Binlog,多个从节点可以同时连接到一台主节点上,消费Binlog日志的不同部分,互相之间不会影响。 - -Binlog日志支持两种主要格式(此外还有混合模式mixed-based): - -``` -statement-based格式: Binlog只保存主节点上执行的sql语句,从节点将其复制到本地重新执行 -row-based格式: Binlog会记录主节点的每一行所有列的数据的变更信息,从节点会复制并执行每一行的变更到本地 -``` - -第一种格式只写入了执行的sql语句,虽然日志量会很少,但是有下列缺点 - - 1. 没有保存每一行实际的数据 - 2. 在主节点上执行的UDF、随机、时间函数会在从节点上结果不一致 - 3. limit语句执行顺序可能不一致 - -因此我们需要选择第二种格式,才能从Binlog日志中解析出每一行数据。 - -在row-based格式下,Binlog会记录每一条binlog event的时间戳,server id,偏移量等信息,如下面一条带有两条insert语句的事务: - -``` -begin; -insert into canal_test.test_tbl values (3, 300); -insert into canal_test.test_tbl values (4, 400); -commit; -``` - -对应将会有四条binlog event,其中一条begin event,两条insert event,一条commit event: - -``` -SET TIMESTAMP=1538238301/*!*/; -BEGIN -/*!*/. -# at 211935643 -# at 211935698 -#180930 0:25:01 server id 1 end_log_pos 211935698 Table_map: 'canal_test'.'test_tbl' mapped to number 25 -#180930 0:25:01 server id 1 end_log_pos 211935744 Write_rows: table-id 25 flags: STMT_END_F -... -'/*!*/; -### INSERT INTO canal_test.test_tbl -### SET -### @1=1 -### @2=100 -# at 211935744 -#180930 0:25:01 server id 1 end_log_pos 211935771 Xid = 2681726641 -... -'/*!*/; -### INSERT INTO canal_test.test_tbl -### SET -### @1=2 -### @2=200 -# at 211935771 -#180930 0:25:01 server id 1 end_log_pos 211939510 Xid = 2681726641 -COMMIT/*!*/; -``` - -如上图所示,每条Insert event中包含了修改的数据。在进行Delete/Update操作时,一条event还能包含多行数据,使得Binlog日志更加的紧密。 - - - -### 开启GTID模式 [可选] -一个全局事务Id(global transaction identifier)标识出了一个曾在主节点上提交过的事务,在全局都是唯一有效的。开启了Binlog后,GTID会被写入到Binlog文件中,与事务一一对应。 - -要打开mysql的GTID模式,则需要编辑my.cnf配置文件设置一下 - -``` -gtid-mode=on // 开启gtid模式 -enforce-gtid-consistency=1 // 强制gtid和事务的一致性 -``` - -在GTID模式下,主服务器可以不需要Binlog的文件名和偏移量,就能很方便的追踪事务、恢复数据、复制副本。 - -在GTID模式下,由于GTID的全局有效性,从节点将不再需要通过保存文件名和偏移量来定位主节点上的Binlog位置,而通过数据本身就可以定位了。在进行数据同步中,从节点会跳过执行任意被识别为已执行的GTID事务。 - -GTID的表现形式为一对坐标, `source_id`标识出主节点,`transaction_id`表示此事务在主节点上执行的顺序(最大263-1)。 - -``` -GTID = source_id:transaction_id -``` - -例如,在同一主节点上执行的第23个事务的gtid为 - -``` -3E11FA47-71CA-11E1-9E33-C80AA9429562:23 -``` - -## 配置Canal端 -canal是属于阿里巴巴otter项目下的一个子项目,主要用途是基于 MySQL 数据库增量日志解析,提供增量数据订阅和消费,用于解决跨机房同步的业务场景,建议使用canal 1.1.5及以上版本,[下载地址](https://github.com/alibaba/canal/releases),下载完成后,请按以下步骤完成部署。 - -1. 解压canal deployer -2. 在conf文件夹下新建目录并重命名,作为instance的根目录,目录名即后文提到的destination -3. 修改instance配置文件(可拷贝conf/example/instance.properties) - - ``` - vim conf/{your destination}/instance.properties - ``` - ``` - ## canal instance serverId - canal.instance.mysql.slaveId = 1234 - ## mysql adress - canal.instance.master.address = 127.0.0.1:3306 - ## mysql username/password - canal.instance.dbUsername = canal - canal.instance.dbPassword = canal - ``` - -4. 启动 - - ``` - sh bin/startup.sh - ``` - -5. 验证启动成功 - - ``` - cat logs/{your destination}/{your destination}.log - ``` - ``` - 2013-02-05 22:50:45.636 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [canal.properties] - 2013-02-05 22:50:45.641 [main] INFO c.a.o.c.i.spring.support.PropertyPlaceholderConfigurer - Loading properties file from class path resource [xxx/instance.properties] - 2013-02-05 22:50:45.803 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start CannalInstance for 1-xxx - 2013-02-05 22:50:45.810 [main] INFO c.a.otter.canal.instance.spring.CanalInstanceWithSpring - start successful.... - ``` - -### canal端说明 - -canal通过伪造自己的mysql dump协议,去伪装成一个从节点,获取主节点的Binlog日志并解析。 - -canal server上可启动多个instance,一个instance可看作一个从节点,每个instance由下面几个部分组成: - -``` -------------------------------------------------- -| Server | -| -------------------------------------------- | -| | Instance 1 | | -| | ----------- ----------- ----------- | | -| | | Parser | | Sink | | Store | | | -| | ----------- ----------- ----------- | | -| | ----------------------------------- | | -| | | MetaManager | | | -| | ----------------------------------- | | -| -------------------------------------------- | -------------------------------------------------- -``` - -* parser:数据源接入,模拟slave协议和master进行交互,协议解析 -* sink:parser和store链接器,进行数据过滤,加工,分发的工作 -* store:数据存储 -* meta manager:元数据管理模块 - -每个instance都有自己在cluster内的唯一标识,即server Id。 - -在canal server内,instance用字符串表示,此唯一字符串被记为destination,canal client需要通过destination连接到对应的instance。 - -**注意:canal client和canal instance是一一对应的**,Binlog Load已限制多个数据同步作业不能连接到同一个destination。 - -数据在instance内的流向是binlog -> parser -> sink -> store。 - -instance通过parser模块解析binlog日志,解析出来的数据缓存在store里面,当用户向FE提交一个数据同步作业时,会启动一个canal client订阅并获取对应instance中的store内的数据。 - -store实际上是一个环形的队列,用户可以自行配置它的长度和存储空间。 - -![store](/images/canal_store.png) - -store通过三个指针去管理队列内的数据: - -1. get指针:get指针代表客户端最后获取到的位置。 -2. ack指针:ack指针记录着最后消费成功的位置。 -3. put指针:代表sink模块最后写入store成功的位置。 - -``` -canal client异步获取store中数据 - - get 0 get 1 get 2 put - | | | ...... | - v v v v ---------------------------------------------------------------------- store环形队列 - ^ ^ - | | - ack 0 ack 1 -``` - -canal client调用get命令时,canal server会产生数据batch发送给client,并右移get指针,client可以获取多个batch,直到get指针赶上put指针为止。 - -当消费数据成功时,client会返回ack + batch Id通知已消费成功了,并右移ack指针,store会从队列中删除此batch的数据,腾出空间来从上游sink模块获取数据,并右移put指针。 - -当数据消费失败时,client会返回rollback通知消费失败,store会将get指针重置左移到ack指针位置,使下一次client获取的数据能再次从ack指针处开始。 - -和Mysql中的从节点一样,canal也需要去保存client最新消费到的位置。canal中所有元数据(如GTID、Binlog位置)都是由MetaManager去管理的,目前元数据默认以json格式持久化在instance根目录下的meta.dat文件内。 - -## 基本操作 - -### 配置目标表属性 - -用户需要先在Doris端创建好与Mysql端对应的目标表 - -Binlog Load只能支持Unique类型的目标表,且必须激活目标表的Batch Delete功能。 - -开启Batch Delete的方法可以参考`help alter table`中的批量删除功能。 - -示例: - -``` --- create target table -CREATE TABLE `test1` ( - `a` int(11) NOT NULL COMMENT "", - `b` int(11) NOT NULL COMMENT "" -) ENGINE=OLAP -UNIQUE KEY(`a`) -COMMENT "OLAP" -DISTRIBUTED BY HASH(`a`) BUCKETS 8; - --- enable batch delete -ALTER TABLE canal_test.test1 ENABLE FEATURE "BATCH_DELETE"; -``` - -### 创建同步作业 -创建数据同步作业的的详细语法可以连接到 Doris 后,执行 HELP CREATE SYNC JOB; 查看语法帮助。这里主要详细介绍,创建作业时的注意事项。 - -* job_name - - `job_name`是数据同步作业在当前数据库内的唯一标识,相同`job_name`的作业只能有一个在运行。 - -* channel_desc - - `channel_desc `用来定义任务下的数据通道,可表示mysql源表到doris目标表的映射关系。在设置此项时,如果存在多个映射关系,必须满足mysql源表应该与doris目标表是一一对应关系,其他的任何映射关系(如一对多关系),检查语法时都被视为不合法。 - -* column_mapping - - `column_mapping`主要指mysql源表和doris目标表的列之间的映射关系,如果不指定,FE会默认源表和目标表的列按顺序一一对应。但是我们依然建议显式的指定列的映射关系,这样当目标表的结构发生变化(比如增加一个 nullable 的列),数据同步作业依然可以进行。否则,当发生上述变动后,因为列映射关系不再一一对应,导入将报错。 - -* binlog_desc - - `binlog_desc`中的属性定义了对接远端Binlog地址的一些必要信息,目前可支持的对接类型只有canal方式,所有的配置项前都需要加上canal前缀。 - - 1. `canal.server.ip`: canal server的地址 - 2. `canal.server.port`: canal server的端口 - 3. `canal.destination`: 前文提到的instance的字符串标识 - 4. `canal.batchSize`: 每批从canal server处获取的batch大小的最大值,默认8192 - 5. `canal.username`: instance的用户名 - 6. `canal.password`: instance的密码 - 7. `canal.debug`: 设置为true时,会将batch和每一行数据的详细信息都打印出来,会影响性能。 - -### 查看作业状态 - -查看作业状态的具体命令和示例可以通过 `HELP SHOW SYNC JOB;` 命令查看。 - -返回结果集的参数意义如下: - -* State - - 作业当前所处的阶段。作业状态之间的转换如下图所示: - - ``` - +-------------+ - create job | PENDING | resume job - +-----------+ <-------------+ - | +-------------+ | - +----v-------+ +-------+----+ - | RUNNING | pause job | PAUSED | - | +-----------------------> | - +----+-------+ run error +-------+----+ - | +-------------+ | - | | CANCELLED | | - +-----------> <-------------+ - stop job +-------------+ stop job - system error - ``` - 作业提交之后状态为PENDING,由FE调度执行启动canal client后状态变成RUNNING,用户可以通过 STOP/PAUSE/RESUME 三个命令来控制作业的停止,暂停和恢复,操作后作业状态分别为CANCELLED/PAUSED/RUNNING。 - - 作业的最终阶段只有一个CANCELLED,当作业状态变为CANCELLED后,将无法再次恢复。当作业发生了错误时,若错误是不可恢复的,状态会变成CANCELLED,否则会变成PAUSED。 - -* Channel - - 作业所有源表到目标表的映射关系。 - -* Status - - 当前binlog的消费位置(若设置了GTID模式,会显示GTID),以及doris端执行时间相比mysql端的延迟时间。 - -* JobConfig - - 对接的远端服务器信息,如canal server的地址与连接instance的destination - -### 控制作业 -用户可以通过 STOP/PAUSE/RESUME 三个命令来控制作业的停止,暂停和恢复。可以通过`HELP STOP SYNC JOB`; `HELP PAUSE SYNC JOB`; 以及 `HELP RESUME SYNC JOB`; 三个命令查看帮助和示例。 - -## 相关参数 - -### CANAL配置 - -下面配置属于canal端的配置,主要通过修改 conf 目录下的 canal.properties 调整配置值。 - -* `canal.ip` - - canal server的ip地址 - -* `canal.port` - - canal server的端口 - -* `canal.instance.memory.buffer.size` - - canal端的store环形队列的队列长度,必须设为2的幂次方,默认长度16384。此值等于canal端能缓存event数量的最大值,也直接决定了Doris端一个事务内所能容纳的最大event数量。建议将它改的足够大,防止Doris端一个事务内能容纳的数据量上限太小,导致提交事务太过频繁造成数据的版本堆积。 - -* `canal.instance.memory.buffer.memunit` - - canal端默认一个event所占的空间,默认空间为1024 bytes。此值乘上store环形队列的队列长度等于store的空间最大值,比如store队列长度为16384,则store的空间为16MB。但是,一个event的实际大小并不等于此值,而是由这个event内有多少行数据和每行数据的长度决定的,比如一张只有两列的表的insert event只有30字节,但delete event可能达到数千字节,这是因为通常delete event的行数比insert event多。 - -### FE配置 - -下面配置属于数据同步作业的系统级别配置,主要通过修改 fe.conf 来调整配置值。 - -* `enable_create_sync_job` - - 开启数据同步作业功能。默认为 false,关闭此功能。 - -* `sync_commit_interval_second` - - 提交事务的最大时间间隔。若超过了这个时间channel中还有数据没有提交,consumer会通知channel提交事务。 - -* `min_sync_commit_size` - - 提交事务需满足的最小event数量。若Fe接收到的event数量小于它,会继续等待下一批数据直到时间超过了`sync_commit_interval_second `为止。默认值是10000个events,如果你想修改此配置,请确保此值小于canal端的`canal.instance.memory.buffer.size`配置(默认16384),否则在ack前Fe会尝试获取比store队列长度更多的event,导致store队列阻塞至超时为止。 - -* `min_bytes_sync_commit` - - 提交事务需满足的最小数据大小。若Fe接收到的数据大小小于它,会继续等待下一批数据直到时间超过了`sync_commit_interval_second `为止。默认值是15MB,如果你想修改此配置,请确保此值小于canal端的`canal.instance.memory.buffer.size`和`canal.instance.memory.buffer.memunit`的乘积(默认16MB),否则在ack前Fe会尝试获取比store空间更大的数据,导致store队列阻塞至超时为止。 - -* `max_bytes_sync_commit` - - 提交事务时的数据大小的最大值。若Fe接收到的数据大小大于它,会立即提交事务并发送已积累的数据。默认值是64MB,如果你想修改此配置,请确保此值大于canal端的`canal.instance.memory.buffer.size`和`canal.instance.memory.buffer.memunit`的乘积(默认16MB)和`min_bytes_sync_commit`。 - -* `max_sync_task_threads_num` - - 数据同步作业线程池中的最大线程数量。此线程池整个FE中只有一个,用于处理FE中所有数据同步作业向BE发送数据的任务task,线程池的实现在`SyncTaskPool`类。 - -## 常见问题 - -1. 修改表结构是否会影响数据同步作业? - - 会影响。数据同步作业并不能禁止`alter table`的操作,当表结构发生了变化,如果列的映射无法匹配,可能导致作业发生错误暂停,建议通过在数据同步作业中显式指定列映射关系,或者通过增加 Nullable 列或带 Default 值的列来减少这类问题。 - -2. 删除了数据库后数据同步作业还会继续运行吗? - - 不会。删除数据库后的几秒日志中可能会出现找不到元数据的错误,之后该数据同步作业会被FE的定时调度检查时停止。 - -3. 多个数据同步作业可以配置相同的`ip:port + destination`吗? - - 不能。创建数据同步作业时会检查`ip:port + destination`与已存在的作业是否重复,防止出现多个作业连接到同一个instance的情况。 - -4. 为什么数据同步时浮点类型的数据精度在Mysql端和Doris端不一样? - - Doris本身浮点类型的精度与Mysql不一样。可以选择用Decimal类型代替。 - \ No newline at end of file diff --git a/docs/zh-CN/administrator-guide/load-data/broker-load-manual.md b/docs/zh-CN/administrator-guide/load-data/broker-load-manual.md deleted file mode 100644 index 9c7ea16401..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/broker-load-manual.md +++ /dev/null @@ -1,544 +0,0 @@ ---- -{ - "title": "Broker Load", - "language": "zh-CN" -} ---- - - - -# Broker Load - -Broker load 是一个异步的导入方式,支持的数据源取决于 Broker 进程支持的数据源。 - -用户需要通过 MySQL 协议 创建 Broker load 导入,并通过查看导入命令检查导入结果。 - -## 适用场景 - -* 源数据在 Broker 可以访问的存储系统中,如 HDFS。 -* 数据量在 几十到百GB 级别。 - -## 名词解释 - -1. Frontend(FE):Doris 系统的元数据和调度节点。在导入流程中主要负责导入 plan 生成和导入任务的调度工作。 -2. Backend(BE):Doris 系统的计算和存储节点。在导入流程中主要负责数据的 ETL 和存储。 -3. Broker:Broker 为一个独立的无状态进程。封装了文件系统接口,提供 Doris 读取远端存储系统中文件的能力。 -4. Plan:导入执行计划,BE 会执行导入执行计划将数据导入到 Doris 系统中。 - -## 基本原理 - -用户在提交导入任务后,FE 会生成对应的 Plan 并根据目前 BE 的个数和文件的大小,将 Plan 分给 多个 BE 执行,每个 BE 执行一部分导入数据。 - -BE 在执行的过程中会从 Broker 拉取数据,在对数据 transform 之后将数据导入系统。所有 BE 均完成导入,由 FE 最终决定导入是否成功。 - -``` - + - | 1. user create broker load - v - +----+----+ - | | - | FE | - | | - +----+----+ - | - | 2. BE etl and load the data - +--------------------------+ - | | | -+---v---+ +--v----+ +---v---+ -| | | | | | -| BE | | BE | | BE | -| | | | | | -+---+-^-+ +---+-^-+ +--+-^--+ - | | | | | | - | | | | | | 3. pull data from broker -+---v-+-+ +---v-+-+ +--v-+--+ -| | | | | | -|Broker | |Broker | |Broker | -| | | | | | -+---+-^-+ +---+-^-+ +---+-^-+ - | | | | | | -+---v-+-----------v-+----------v-+-+ -| HDFS/BOS/AFS cluster | -| | -+----------------------------------+ - -``` - -## 基本操作 - -### 创建导入 - -Broker load 创建导入语句 - -语法: - -``` -LOAD LABEL db_name.label_name -(data_desc, ...) -WITH BROKER broker_name broker_properties -[PROPERTIES (key1=value1, ... )] - -* data_desc: - - DATA INFILE ('file_path', ...) - [NEGATIVE] - INTO TABLE tbl_name - [PARTITION (p1, p2)] - [COLUMNS TERMINATED BY separator ] - [(col1, ...)] - [PRECEDING FILTER predicate] - [SET (k1=f1(xx), k2=f2(xx))] - [WHERE predicate] - -* broker_properties: - - (key1=value1, ...) -``` -示例: - -``` -LOAD LABEL db1.label1 -( - DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1") - INTO TABLE tbl1 - COLUMNS TERMINATED BY "," - (tmp_c1,tmp_c2) - SET - ( - id=tmp_c2, - name=tmp_c1 - ), - DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2") - INTO TABLE tbl2 - COLUMNS TERMINATED BY "," - (col1, col2) - where col1 > 1 -) -WITH BROKER 'broker' -( - "username"="user", - "password"="pass" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -创建导入的详细语法执行 ```HELP BROKER LOAD``` 查看语法帮助。这里主要介绍 Broker load 的创建导入语法中参数意义和注意事项。 - -#### Label - -导入任务的标识。每个导入任务,都有一个在单 database 内部唯一的 Label。Label 是用户在导入命令中自定义的名称。通过这个 Label,用户可以查看对应导入任务的执行情况。 - -Label 的另一个作用,是防止用户重复导入相同的数据。**强烈推荐用户同一批次数据使用相同的label。这样同一批次数据的重复请求只会被接受一次,保证了 At-Most-Once 语义** - -当 Label 对应的导入作业状态为 CANCELLED 时,可以再次使用该 Label 提交导入作业。 - -#### 数据描述类参数 - -数据描述类参数主要指的是 Broker load 创建导入语句中的属于 ```data_desc``` 部分的参数。每组 ```data_desc ``` 主要表述了本次导入涉及到的数据源地址,ETL 函数,目标表及分区等信息。 - -下面主要对数据描述类的部分参数详细解释: - -+ 多表导入 - - Broker load 支持一次导入任务涉及多张表,每个 Broker load 导入任务可在多个 ``` data_desc ``` 声明多张表来实现多表导入。每个单独的 ```data_desc``` 还可以指定属于该表的数据源地址。Broker load 保证了单次导入的多张表之间原子性成功或失败。 - -+ negative - - ```data_desc```中还可以设置数据取反导入。这个功能主要用于,当数据表中聚合列的类型都为 SUM 类型时。如果希望撤销某一批导入的数据。则可以通过 `negative` 参数导入同一批数据。Doris 会自动为这一批数据在聚合列上数据取反,以达到消除同一批数据的功能。 - -+ partition - - 在 ```data_desc``` 中可以指定待导入表的 partition 信息,如果待导入数据不属于指定的 partition 则不会被导入。同时,不在指定 Partition 的数据会被认为是错误数据。 - -+ set column mapping - - 在 ```data_desc``` 中的 SET 语句负责设置列函数变换,这里的列函数变换支持所有查询的等值表达式变换。如果原始数据的列和表中的列不一一对应,就需要用到这个属性。 - -+ preceding filter predicate - - 用于过滤原始数据。原始数据是未经列映射、转换的数据。用户可以在对转换前的数据前进行一次过滤,选取期望的数据,再进行转换。 - -+ where predicate - - 在 ```data_desc``` 中的 WHERE 语句中负责过滤已经完成 transform 的数据,被 filter 的数据不会进入容忍率的统计中。如果多个 data_desc 中声明了同一张表的多个条件的话,则会 merge 同一张表的多个条件,merge 策略是 AND 。 - -#### 导入作业参数 - -导入作业参数主要指的是 Broker load 创建导入语句中的属于 ```opt_properties```部分的参数。导入作业参数是作用于整个导入作业的。 - -下面主要对导入作业参数的部分参数详细解释: - -+ timeout - - 导入作业的超时时间(以秒为单位),用户可以在 ```opt_properties``` 中自行设置每个导入的超时时间。导入任务在设定的 timeout 时间内未完成则会被系统取消,变成 CANCELLED。Broker load 的默认导入超时时间为4小时。 - - 通常情况下,用户不需要手动设置导入任务的超时时间。当在默认超时时间内无法完成导入时,可以手动设置任务的超时时间。 - - > 推荐超时时间 - > - > 总文件大小(MB) / 用户 Doris 集群最慢导入速度(MB/s) > timeout > ((总文件大小(MB) * 待导入的表及相关 Roll up 表的个数) / (10 * 导入并发数) ) - - > 导入并发数见文档最后的导入系统配置说明,公式中的 10 为目前的导入限速 10MB/s。 - - > 例如一个 1G 的待导入数据,待导入表包含3个 Rollup 表,当前的导入并发数为 3。则 timeout 的 最小值为 ```(1 * 1024 * 3 ) / (10 * 3) = 102 秒``` - - 由于每个 Doris 集群的机器环境不同且集群并发的查询任务也不同,所以用户 Doris 集群的最慢导入速度需要用户自己根据历史的导入任务速度进行推测。 - -+ max\_filter\_ratio - - 导入任务的最大容忍率,默认为0容忍,取值范围是0~1。当导入的错误率超过该值,则导入失败。 - - 如果用户希望忽略错误的行,可以通过设置这个参数大于 0,来保证导入可以成功。 - - 计算公式为: - - ``` max_filter_ratio = (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) ``` - - ```dpp.abnorm.ALL``` 表示数据质量不合格的行数。如类型不匹配,列数不匹配,长度不匹配等等。 - - ```dpp.norm.ALL``` 指的是导入过程中正确数据的条数。可以通过 ```SHOW LOAD``` 命令查询导入任务的正确数据量。 - - 原始文件的行数 = `dpp.abnorm.ALL + dpp.norm.ALL` - -+ exec\_mem\_limit - - 导入内存限制。默认是 2GB。单位为字节。 - -+ strict\_mode - - Broker load 导入可以开启 strict mode 模式。开启方式为 ```properties ("strict_mode" = "true")``` 。默认的 strict mode 为关闭。 - - strict mode 模式的意思是:对于导入过程中的列类型转换进行严格过滤。严格过滤的策略如下: - - 1. 对于列类型转换来说,如果 strict mode 为true,则错误的数据将被 filter。这里的错误数据是指:原始数据并不为空值,在参与列类型转换后结果为空值的这一类数据。 - - 2. 对于导入的某列由函数变换生成时,strict mode 对其不产生影响。 - - 3. 对于导入的某列类型包含范围限制的,如果原始数据能正常通过类型转换,但无法通过范围限制的,strict mode 对其也不产生影响。例如:如果类型是 decimal(1,0), 原始数据为 10,则属于可以通过类型转换但不在列声明的范围内。这种数据 strict 对其不产生影响。 -+ merge\_type - 数据的合并类型,一共支持三种类型APPEND、DELETE、MERGE 其中,APPEND是默认值,表示这批数据全部需要追加到现有数据中,DELETE 表示删除与这批数据key相同的所有行,MERGE 语义 需要与delete 条件联合使用,表示满足delete 条件的数据按照DELETE 语义处理其余的按照APPEND 语义处理 - -#### strict mode 与 source data 的导入关系 - -这里以列类型为 TinyInt 来举例 - ->注:当表中的列允许导入空值时 - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|---------| -|空值 | \N | N/A | true or false | NULL| -|not null | aaa or 2000 | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 | 1 | true or false | correct data| - -这里以列类型为 Decimal(1,0) 举例 - ->注:当表中的列允许导入空值时 - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|--------| -|空值 | \N | N/A | true or false | NULL| -|not null | aaa | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 or 10 | 1 | true or false | correct data| - -> 注意:10 虽然是一个超过范围的值,但是因为其类型符合 decimal的要求,所以 strict mode对其不产生影响。10 最后会在其他 ETL 处理流程中被过滤。但不会被 strict mode 过滤。 - -#### Broker 参数 - -Broker Load 需要借助 Broker 进程访问远端存储,不同的 Broker 需要提供不同的参数,具体请参阅 [Broker文档](../broker.md) - -### 查看导入 - -Broker load 导入方式由于是异步的,所以用户必须将创建导入的 Label 记录,并且在**查看导入命令中使用 Label 来查看导入结果**。查看导入命令在所有导入方式中是通用的,具体语法可执行 ```HELP SHOW LOAD``` 查看。 - -示例: - -``` -mysql> show load order by createtime desc limit 1\G -*************************** 1. row *************************** - JobId: 76391 - Label: label1 - State: FINISHED - Progress: ETL:100%; LOAD:100% - Type: BROKER - EtlInfo: unselected.rows=4; dpp.abnorm.ALL=15; dpp.norm.ALL=28133376 - TaskInfo: cluster:N/A; timeout(s):10800; max_filter_ratio:5.0E-5 - ErrorMsg: N/A - CreateTime: 2019-07-27 11:46:42 - EtlStartTime: 2019-07-27 11:46:44 - EtlFinishTime: 2019-07-27 11:46:44 - LoadStartTime: 2019-07-27 11:46:44 -LoadFinishTime: 2019-07-27 11:50:16 - URL: http://192.168.1.1:8040/api/_load_error_log?file=__shard_4/error_log_insert_stmt_4bb00753932c491a-a6da6e2725415317_4bb00753932c491a_a6da6e2725415317 - JobDetails: {"Unfinished backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"ScannedRows":2390016,"TaskNumber":1,"All backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"FileNumber":1,"FileSize":1073741824} -``` - -下面主要介绍了查看导入命令返回结果集中参数意义: - -+ JobId - - 导入任务的唯一ID,每个导入任务的 JobId 都不同,由系统自动生成。与 Label 不同的是,JobId永远不会相同,而 Label 则可以在导入任务失败后被复用。 - -+ Label - - 导入任务的标识。 - -+ State - - 导入任务当前所处的阶段。在 Broker load 导入过程中主要会出现 PENDING 和 LOADING 这两个导入中的状态。如果 Broker load 处于 PENDING 状态,则说明当前导入任务正在等待被执行;LOADING 状态则表示正在执行中。 - - 导入任务的最终阶段有两个:CANCELLED 和 FINISHED,当 Load job 处于这两个阶段时,导入完成。其中 CANCELLED 为导入失败,FINISHED 为导入成功。 - -+ Progress - - 导入任务的进度描述。分为两种进度:ETL 和 LOAD,对应了导入流程的两个阶段 ETL 和 LOADING。目前 Broker load 由于只有 LOADING 阶段,所以 ETL 则会永远显示为 `100%` - - LOAD 的进度范围为:0~100%。 - - ```LOAD 进度 = 当前完成导入的表个数 / 本次导入任务设计的总表个数 * 100%``` - - **如果所有导入表均完成导入,此时 LOAD 的进度为 99%** 导入进入到最后生效阶段,整个导入完成后,LOAD 的进度才会改为 100%。 - - 导入进度并不是线性的。所以如果一段时间内进度没有变化,并不代表导入没有在执行。 - -+ Type - - 导入任务的类型。Broker load 的 type 取值只有 BROKER。 - -+ EtlInfo - - 主要显示了导入的数据量指标 ```unselected.rows``` , ```dpp.norm.ALL``` 和 ```dpp.abnorm.ALL```。用户可以根据第一个数值判断 where 条件过滤了多少行,后两个指标验证当前导入任务的错误率是否超过 ```max_filter_ratio```。 - - 三个指标之和就是原始数据量的总行数。 - -+ TaskInfo - - 主要显示了当前导入任务参数,也就是创建 Broker load 导入任务时用户指定的导入任务参数,包括:`cluster`,`timeout` 和`max_filter_ratio`。 - -+ ErrorMsg - - 在导入任务状态为CANCELLED,会显示失败的原因,显示分两部分:type 和 msg,如果导入任务成功则显示 ```N/A```。 - - type的取值意义: - - ``` - USER_CANCEL: 用户取消的任务 - ETL_RUN_FAIL:在ETL阶段失败的导入任务 - ETL_QUALITY_UNSATISFIED:数据质量不合格,也就是错误数据率超过了 max_filter_ratio - LOAD_RUN_FAIL:在LOADING阶段失败的导入任务 - TIMEOUT:导入任务没在超时时间内完成 - UNKNOWN:未知的导入错误 - ``` - -+ CreateTime/EtlStartTime/EtlFinishTime/LoadStartTime/LoadFinishTime - - 这几个值分别代表导入创建的时间,ETL阶段开始的时间,ETL阶段完成的时间,Loading阶段开始的时间和整个导入任务完成的时间。 - - Broker load 导入由于没有 ETL 阶段,所以其 EtlStartTime, EtlFinishTime, LoadStartTime 被设置为同一个值。 - - 导入任务长时间停留在 CreateTime,而 LoadStartTime 为 N/A 则说明目前导入任务堆积严重。用户可减少导入提交的频率。 - - ``` - LoadFinishTime - CreateTime = 整个导入任务所消耗时间 - LoadFinishTime - LoadStartTime = 整个 Broker load 导入任务执行时间 = 整个导入任务所消耗时间 - 导入任务等待的时间 - ``` - -+ URL - - 导入任务的错误数据样例,访问 URL 地址既可获取本次导入的错误数据样例。当本次导入不存在错误数据时,URL 字段则为 N/A。 - -+ JobDetails - - 显示一些作业的详细运行状态。包括导入文件的个数、总大小(字节)、子任务个数、已处理的原始行数,运行子任务的 BE 节点 Id,未完成的 BE 节点 Id。 - - ``` - {"Unfinished backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"ScannedRows":2390016,"TaskNumber":1,"All backends":{"9c3441027ff948a0-8287923329a2b6a7":[10002]},"FileNumber":1,"FileSize":1073741824} - ``` - - 其中已处理的原始行数,每 5 秒更新一次。该行数仅用于展示当前的进度,不代表最终实际的处理行数。实际处理行数以 EtlInfo 中显示的为准。 - -### 取消导入 - -当 Broker load 作业状态不为 CANCELLED 或 FINISHED 时,可以被用户手动取消。取消时需要指定待取消导入任务的 Label 。取消导入命令语法可执行 ```HELP CANCEL LOAD```查看。 - -## 相关系统配置 - -### FE 配置 - -下面几个配置属于 Broker load 的系统级别配置,也就是作用于所有 Broker load 导入任务的配置。主要通过修改 ``` fe.conf```来调整配置值。 - -+ min\_bytes\_per\_broker\_scanner/max\_bytes\_per\_broker\_scanner/max\_broker\_concurrency - - 前两个配置限制了单个 BE 处理的数据量的最小和最大值。第三个配置限制了一个作业的最大的导入并发数。最小处理的数据量,最大并发数,源文件的大小和当前集群 BE 的个数 **共同决定了本次导入的并发数**。 - - ``` - 本次导入并发数 = Math.min(源文件大小/最小处理量,最大并发数,当前BE节点个数) - 本次导入单个BE的处理量 = 源文件大小/本次导入的并发数 - ``` - - 通常一个导入作业支持的最大数据量为 `max_bytes_per_broker_scanner * BE 节点数`。如果需要导入更大数据量,则需要适当调整 `max_bytes_per_broker_scanner` 参数的大小。 - - 默认配置: - - ``` - 参数名:min_bytes_per_broker_scanner, 默认 64MB,单位bytes。 - 参数名:max_broker_concurrency, 默认 10。 - 参数名:max_bytes_per_broker_scanner,默认 3G,单位bytes。 - ``` - -## 最佳实践 - -### 应用场景 - -使用 Broker load 最适合的场景就是原始数据在文件系统(HDFS,BOS,AFS)中的场景。其次,由于 Broker load 是单次导入中唯一的一种异步导入的方式,所以如果用户在导入大文件中,需要使用异步接入,也可以考虑使用 Broker load。 - -### 数据量 - -这里仅讨论单个 BE 的情况,如果用户集群有多个 BE 则下面标题中的数据量应该乘以 BE 个数来计算。比如:如果用户有3个 BE,则 3G 以下(包含)则应该乘以 3,也就是 9G 以下(包含)。 - -+ 3G 以下(包含) - - 用户可以直接提交 Broker load 创建导入请求。 - -+ 3G 以上 - - 由于单个导入 BE 最大的处理量为 3G,超过 3G 的待导入文件就需要通过调整 Broker load 的导入参数来实现大文件的导入。 - - 1. 根据当前 BE 的个数和原始文件的大小修改单个 BE 的最大扫描量和最大并发数。 - - ``` - 修改 fe.conf 中配置 - - max_broker_concurrency = BE 个数 - 当前导入任务单个 BE 处理的数据量 = 原始文件大小 / max_broker_concurrency - max_bytes_per_broker_scanner >= 当前导入任务单个 BE 处理的数据量 - - 比如一个 100G 的文件,集群的 BE 个数为 10 个 - max_broker_concurrency = 10 - max_bytes_per_broker_scanner >= 10G = 100G / 10 - - ``` - - 修改后,所有的 BE 会并发的处理导入任务,每个 BE 处理原始文件的一部分。 - - *注意:上述两个 FE 中的配置均为系统配置,也就是说其修改是作用于所有的 Broker load的任务的。* - - 2. 在创建导入的时候自定义当前导入任务的 timeout 时间 - - ``` - 当前导入任务单个 BE 处理的数据量 / 用户 Doris 集群最慢导入速度(MB/s) >= 当前导入任务的 timeout 时间 >= 当前导入任务单个 BE 处理的数据量 / 10M/s - - 比如一个 100G 的文件,集群的 BE 个数为 10个 - timeout >= 1000s = 10G / 10M/s - - ``` - - 3. 当用户发现第二步计算出的 timeout 时间超过系统默认的导入最大超时时间 4小时 - - 这时候不推荐用户将导入最大超时时间直接改大来解决问题。单个导入时间如果超过默认的导入最大超时时间4小时,最好是通过切分待导入文件并且分多次导入来解决问题。主要原因是:单次导入超过4小时的话,导入失败后重试的时间成本很高。 - - 可以通过如下公式计算出 Doris 集群期望最大导入文件数据量: - - ``` - 期望最大导入文件数据量 = 14400s * 10M/s * BE 个数 - 比如:集群的 BE 个数为 10个 - 期望最大导入文件数据量 = 14400s * 10M/s * 10 = 1440000M ≈ 1440G - - 注意:一般用户的环境可能达不到 10M/s 的速度,所以建议超过 500G 的文件都进行文件切分,再导入。 - - ``` - -### 作业调度 - -系统会限制一个集群内,正在运行的 Broker Load 作业数量,以防止同时运行过多的 Load 作业。 - -首先, FE 的配置参数:`desired_max_waiting_jobs` 会限制一个集群内,未开始或正在运行(作业状态为 PENDING 或 LOADING)的 Broker Load 作业数量。默认为 100。如果超过这个阈值,新提交的作业将会被直接拒绝。 - -一个 Broker Load 作业会被分为 pending task 和 loading task 阶段。其中 pending task 负责获取导入文件的信息,而 loading task 会发送给BE执行具体的导入任务。 - -FE 的配置参数 `async_pending_load_task_pool_size` 用于限制同时运行的 pending task 的任务数量。也相当于控制了实际正在运行的导入任务数量。该参数默认为 10。也就是说,假设用户提交了100个Load作业,同时只会有10个作业会进入 LOADING 状态开始执行,而其他作业处于 PENDING 等待状态。 - -FE 的配置参数 `async_loading_load_task_pool_size` 用于限制同时运行的 loading task 的任务数量。一个 Broker Load 作业会有 1 个 pending task 和多个 loading task (等于 LOAD 语句中 DATA INFILE 子句的个数)。所以 `async_loading_load_task_pool_size` 应该大于等于 `async_pending_load_task_pool_size`。 - -### 性能分析 - -可以在提交 LOAD 作业前,先执行 `set enable_profile=true` 打开会话变量。然后提交导入作业。待导入作业完成后,可以在 FE 的 web 页面的 `Queris` 标签中查看到导入作业的 Profile。 - -这个 Profile 可以帮助分析导入作业的运行状态。 - -当前只有作业成功执行后,才能查看 Profile。 - -### 完整例子 - -数据情况:用户数据在 HDFS 中,文件地址为 hdfs://abc.com:8888/store_sales, hdfs 的认证用户名为 root, 密码为 password, 数据量大小约为 30G,希望导入到数据库 bj_sales 的表 store_sales 中。 - -集群情况:集群的 BE 个数约为 3 个,Broker 名称均为 broker。 - -+ step1: 经过上述方法的计算,本次导入的单个 BE 导入量为 10G,则需要先修改 FE 的配置,将单个 BE 导入最大量修改为: - - ``` - max_bytes_per_broker_scanner = 10737418240 - - ``` - -+ step2: 经计算,本次导入的时间大约为 1000s,并未超过默认超时时间,可不配置导入自定义超时时间。 - -+ step3:创建导入语句 - - ``` - LOAD LABEL bj_sales.store_sales_broker_load_01 - ( - DATA INFILE("hdfs://abc.com:8888/store_sales") - INTO TABLE store_sales - ) - WITH BROKER 'broker' - ("username"="root", "password"="password"); - ``` - -## 常见问题 - -* 导入报错:`Scan bytes per broker scanner exceed limit:xxx` - - 请参照文档中最佳实践部分,修改 FE 配置项 `max_bytes_per_broker_scanner` 和 `max_broker_concurrency` - -* 导入报错:`failed to send batch` 或 `TabletWriter add batch with unknown id` - - 请参照 [导入手册](./load-manual.md) 中 **通用系统配置** 中 **BE 配置**,适当修改 `query_timeout` 和 `streaming_load_rpc_max_alive_time_sec`。 - -* 导入报错:`LOAD_RUN_FAIL; msg:Invalid Column Name:xxx` - - 如果是PARQUET或者ORC格式的数据,需要再文件头的列名与doris表中的列名一致,如 : - ``` - (tmp_c1,tmp_c2) - SET - ( - id=tmp_c2, - name=tmp_c1 - ) - ``` - 代表获取在parquet或orc中以(tmp_c1, tmp_c2)为列名的列,映射到doris表中的(id, name)列。如果没有设置set, 则以column中的列作为映射。 - - 注:如果使用某些hive版本直接生成的orc文件,orc文件中的表头并非hive meta数据,而是(_col0, _col1, _col2, ...), 可能导致Invalid Column Name错误,那么则需要使用set进行映射 - diff --git a/docs/zh-CN/administrator-guide/load-data/delete-manual.md b/docs/zh-CN/administrator-guide/load-data/delete-manual.md deleted file mode 100644 index 6bfdeab57e..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/delete-manual.md +++ /dev/null @@ -1,189 +0,0 @@ ---- -{ - "title": "Delete", - "language": "zh-CN" -} ---- - - - -# Delete - -Delete不同于其他导入方式,它是一个同步过程。和Insert into相似,所有的Delete操作在Doris中是一个独立的导入作业,一般Delete语句需要指定表和分区以及删除的条件来筛选要删除的数据,并将会同时删除base表和rollup表的数据。 - -## 语法 - -主要的Delete语法如下: - -``` -DELETE FROM table_name [PARTITION partition_name] -WHERE -column_name1 op value[ AND column_name2 op value ...]; -``` - -示例1: - -``` -DELETE FROM my_table PARTITION p1 WHERE k1 = 3; -``` - -示例2: - -``` -DELETE FROM my_table PARTITION p1 WHERE k1 < 3 AND k2 = "abc"; -``` - -下面介绍删除语句中使用到的参数: - -* PARTITION - - Delete语句的目标分区,若未指定,则此表必须为单分区表,否则无法delete - -* WHERE - - Delete语句的条件语句,所有删除语句都必须指定WHERE语句 - -说明: - -1. `Where`语句中的op的类型可包括`=, >, <, >=, <=, !=, in, not in`。 -2. `Where`语句中的列只能是`key`列 -3. 当选定的`key`列不存在某个 rollup 表内时,无法进行 delete -4. 条件语句中各个条件只能是`and`关系,如希望达成`or`可将条件分别写入两个 delete 语句中 -5. 如果指定表为 RANGE 或者 LIST 分区表,则必须指定 `PARTITION`。如果是单分区表,可以不指定。 -6. 不同于 Insert into 命令,delete 不能手动指定`label`,有关 label 的概念可以查看[Insert Into文档](./insert-into-manual.md) - -## 返回结果 - -Delete命令是一个SQL命令,返回结果是同步的,分为以下几种: - -1. 执行成功 - - 如果Delete顺利执行完成并可见,将返回下列结果,`Query OK`表示成功 - - ``` - mysql> delete from test_tbl PARTITION p1 where k1 = 1; - Query OK, 0 rows affected (0.04 sec) - {'label':'delete_e7830c72-eb14-4cb9-bbb6-eebd4511d251', 'status':'VISIBLE', 'txnId':'4005'} - ``` - -2. 提交成功,但未可见 - - Doris的事务提交分为两步:提交和发布版本,只有完成了发布版本步骤,结果才对用户是可见的。若已经提交成功了,那么就可以认为最终一定会发布成功,Doris会尝试在提交完后等待发布一段时间,如果超时后即使发布版本还未完成也会优先返回给用户,提示用户提交已经完成。若如果Delete已经提交并执行,但是仍未发布版本和可见,将返回下列结果 - - ``` - mysql> delete from test_tbl PARTITION p1 where k1 = 1; - Query OK, 0 rows affected (0.04 sec) - {'label':'delete_e7830c72-eb14-4cb9-bbb6-eebd4511d251', 'status':'COMMITTED', 'txnId':'4005', 'err':'delete job is committed but may be taking effect later' } - ``` - - 结果会同时返回一个json字符串: - - `affected rows`表示此次删除影响的行,由于Doris的删除目前是逻辑删除,因此对于这个值是恒为0。 - - `label`为自动生成的 label,是该导入作业的标识。每个导入作业,都有一个在单 database 内部唯一的 Label。 - - `status`表示数据删除是否可见,如果可见,显示`VISIBLE`,如果不可见,显示`COMMITTED`。 - - `txnId`为这个Delete job对应的事务id - - `err`字段会显示一些本次删除的详细信息 - -3. 提交失败,事务取消 - - 如果Delete语句没有提交成功,将会被Doris自动中止,返回下列结果 - - ``` - mysql> delete from test_tbl partition p1 where k1 > 80; - ERROR 1064 (HY000): errCode = 2, detailMessage = {错误原因} - ``` - - 示例: - - 比如说一个超时的删除,将会返回timeout时间和未完成的`(tablet=replica)` - - ``` - mysql> delete from test_tbl partition p1 where k1 > 80; - ERROR 1064 (HY000): errCode = 2, detailMessage = failed to delete replicas from job: 4005, Unfinished replicas:10000=60000, 10001=60000, 10002=60000 - ``` - - **综上,对于Delete操作返回结果的正确处理逻辑为:** - - 1. 如果返回结果为`ERROR 1064 (HY000)`,则表示删除失败 - - 2. 如果返回结果为`Query OK`,则表示删除执行成功 - - 1. 如果`status`为`COMMITTED`,表示数据仍不可见,用户可以稍等一段时间再用`show delete`命令查看结果 - 2. 如果`status`为`VISIBLE`,表示数据删除成功。 - -## 可配置项 - -### FE配置 - -**TIMEOUT配置** - -总体来说,Doris的删除作业的超时时间限制在30秒到5分钟时间内,具体时间可通过下面配置项调整 - -* `tablet_delete_timeout_second` - - delete自身的超时时间是可受指定分区下tablet的数量弹性改变的,此项配置为平均一个tablet所贡献的timeout时间,默认值为2。 - - 假设此次删除所指定分区下有5个tablet,那么可提供给delete的timeout时间为10秒,由于低于最低超时时间30秒,因此最终超时时间为30秒。 - -* `load_straggler_wait_second` - - 如果用户预估的数据量确实比较大,使得5分钟的上限不足时,用户可以通过此项调整timeout上限,默认值为300。 - - **TIMEOUT的具体计算规则为(秒)** - - `TIMEOUT = MIN(load_straggler_wait_second, MAX(30, tablet_delete_timeout_second * tablet_num))` - -* `query_timeout` - - 因为delete本身是一个SQL命令,因此删除语句也会受session限制,timeout还受Session中的`query_timeout`值影响,可以通过`SET query_timeout = xxx`来增加超时时间,单位是秒。 - -**IN谓词配置** - -* `max_allowed_in_element_num_of_delete` - - 如果用户在使用in谓词时需要占用的元素比较多,用户可以通过此项调整允许携带的元素上限,默认值为1024。 - -## 查看历史记录 - -1. 用户可以通过show delete语句查看历史上已执行完成的删除记录 - - 语法 - - ``` - SHOW DELETE [FROM db_name] - ``` - - 示例 - - ``` - mysql> show delete from test_db; - +-----------+---------------+---------------------+-----------------+----------+ - | TableName | PartitionName | CreateTime | DeleteCondition | State | - +-----------+---------------+---------------------+-----------------+----------+ - | empty_tbl | p3 | 2020-04-15 23:09:35 | k1 EQ "1" | FINISHED | - | test_tbl | p4 | 2020-04-15 23:09:53 | k1 GT "80" | FINISHED | - +-----------+---------------+---------------------+-----------------+----------+ - 2 rows in set (0.00 sec) - ``` - diff --git a/docs/zh-CN/administrator-guide/load-data/insert-into-manual.md b/docs/zh-CN/administrator-guide/load-data/insert-into-manual.md deleted file mode 100644 index 9d66d1f07e..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/insert-into-manual.md +++ /dev/null @@ -1,310 +0,0 @@ ---- -{ - "title": "Insert Into", - "language": "zh-CN" -} ---- - - - -# Insert Into - -Insert Into 语句的使用方式和 MySQL 等数据库中 Insert Into 语句的使用方式类似。但在 Doris 中,所有的数据写入都是一个独立的导入作业。所以这里将 Insert Into 也作为一种导入方式介绍。 - -主要的 Insert Into 命令包含以下两种; - -* INSERT INTO tbl SELECT ... -* INSERT INTO tbl (col1, col2, ...) VALUES (1, 2, ...), (1,3, ...); - -其中第二种命令仅用于 Demo,不要使用在测试或生产环境中。 - -## 基本操作 - -### 创建导入 - -Insert Into 命令需要通过 MySQL 协议提交,创建导入请求会同步返回导入结果。 - -语法: - -``` -INSERT INTO table_name [partition_info] [WITH LABEL label] [col_list] [query_stmt] [VALUES]; -``` - -示例: - -``` -INSERT INTO tbl2 WITH LABEL label1 SELECT * FROM tbl3; -INSERT INTO tbl1 VALUES ("qweasdzxcqweasdzxc"), ("a"); -``` - -**注意** - -当需要使用 `CTE(Common Table Expressions)` 作为 insert 操作中的查询部分时,必须指定 `WITH LABEL` 和 column list 部分。示例 - -``` -INSERT INTO tbl1 WITH LABEL label1 -WITH cte1 AS (SELECT * FROM tbl1), cte2 AS (SELECT * FROM tbl2) -SELECT k1 FROM cte1 JOIN cte2 WHERE cte1.k1 = 1; - - -INSERT INTO tbl1 (k1) -WITH cte1 AS (SELECT * FROM tbl1), cte2 AS (SELECT * FROM tbl2) -SELECT k1 FROM cte1 JOIN cte2 WHERE cte1.k1 = 1; -``` - -下面主要介绍创建导入语句中使用到的参数: - -+ partition\_info - - 导入表的目标分区,如果指定目标分区,则只会导入符合目标分区的数据。如果没有指定,则默认值为这张表的所有分区。 - -+ col\_list - - 导入表的目标列,可以以任意的顺序存在。如果没有指定目标列,那么默认值是这张表的所有列。如果待表中的某个列没有存在目标列中,那么这个列需要有默认值,否则 Insert Into 就会执行失败。 - - 如果查询语句的结果列类型与目标列的类型不一致,那么会调用隐式类型转化,如果不能够进行转化,那么 Insert Into 语句会报语法解析错误。 - -+ query\_stmt - - 通过一个查询语句,将查询语句的结果导入到 Doris 系统中的其他表。查询语句支持任意 Doris 支持的 SQL 查询语法。 - -+ VALUES - - 用户可以通过 VALUES 语法插入一条或者多条数据。 - - *注意:VALUES 方式仅适用于导入几条数据作为导入 DEMO 的情况,完全不适用于任何测试和生产环境。Doris 系统本身也不适合单条数据导入的场景。建议使用 INSERT INTO SELECT 的方式进行批量导入。* - -* WITH LABEL - - INSERT 操作作为一个导入任务,也可以指定一个 label。如果不指定,则系统会自动指定一个 UUID 作为 label。 - - 该功能需要 0.11+ 版本。 - - *注意:建议指定 Label 而不是由系统自动分配。如果由系统自动分配,但在 Insert Into 语句执行过程中,因网络错误导致连接断开等,则无法得知 Insert Into 是否成功。而如果指定 Label,则可以再次通过 Label 查看任务结果。* - -### 导入结果 - -Insert Into 本身就是一个 SQL 命令,其返回结果会根据执行结果的不同,分为以下几种: - -1. 结果集为空 - - 如果 insert 对应 select 语句的结果集为空,则返回如下: - - ``` - mysql> insert into tbl1 select * from empty_tbl; - Query OK, 0 rows affected (0.02 sec) - ``` - - `Query OK` 表示执行成功。`0 rows affected` 表示没有数据被导入。 - -2. 结果集不为空 - - 在结果集不为空的情况下。返回结果分为如下几种情况: - - 1. Insert 执行成功并可见: - - ``` - mysql> insert into tbl1 select * from tbl2; - Query OK, 4 rows affected (0.38 sec) - {'label':'insert_8510c568-9eda-4173-9e36-6adc7d35291c', 'status':'visible', 'txnId':'4005'} - - mysql> insert into tbl1 with label my_label1 select * from tbl2; - Query OK, 4 rows affected (0.38 sec) - {'label':'my_label1', 'status':'visible', 'txnId':'4005'} - - mysql> insert into tbl1 select * from tbl2; - Query OK, 2 rows affected, 2 warnings (0.31 sec) - {'label':'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status':'visible', 'txnId':'4005'} - - mysql> insert into tbl1 select * from tbl2; - Query OK, 2 rows affected, 2 warnings (0.31 sec) - {'label':'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status':'committed', 'txnId':'4005'} - ``` - - `Query OK` 表示执行成功。`4 rows affected` 表示总共有4行数据被导入。`2 warnings` 表示被过滤的行数。 - - 同时会返回一个 json 串: - - ``` - {'label':'my_label1', 'status':'visible', 'txnId':'4005'} - {'label':'insert_f0747f0e-7a35-46e2-affa-13a235f4020d', 'status':'committed', 'txnId':'4005'} - {'label':'my_label1', 'status':'visible', 'txnId':'4005', 'err':'some other error'} - ``` - - `label` 为用户指定的 label 或自动生成的 label。Label 是该 Insert Into 导入作业的标识。每个导入作业,都有一个在单 database 内部唯一的 Label。 - - `status` 表示导入数据是否可见。如果可见,显示 `visible`,如果不可见,显示 `committed`。 - - `txnId` 为这个 insert 对应的导入事务的 id。 - - `err` 字段会显示一些其他非预期错误。 - - 当需要查看被过滤的行时,用户可以通过如下语句 - - ``` - show load where label="xxx"; - ``` - - 返回结果中的 URL 可以用于查询错误的数据,具体见后面 **查看错误行** 小结。 - - **数据不可见是一个临时状态,这批数据最终是一定可见的** - - 可以通过如下语句查看这批数据的可见状态: - - ``` - show transaction where id=4005; - ``` - - 返回结果中的 `TransactionStatus` 列如果为 `visible`,则表述数据可见。 - - 2. Insert 执行失败 - - 执行失败表示没有任何数据被成功导入,并返回如下: - - ``` - mysql> insert into tbl1 select * from tbl2 where k1 = "a"; - ERROR 1064 (HY000): all partitions have no load data. url: http://10.74.167.16:8042/api/_load_error_log?file=__shard_2/error_log_insert_stmt_ba8bb9e158e4879-ae8de8507c0bf8a2_ba8bb9e158e4879_ae8de8507c0bf8a2 - ``` - - 其中 `ERROR 1064 (HY000): all partitions have no load data` 显示失败原因。后面的 url 可以用于查询错误的数据,具体见后面 **查看错误行** 小结。 - -**综上,对于 insert 操作返回结果的正确处理逻辑应为:** - -1. 如果返回结果为 `ERROR 1064 (HY000)`,则表示导入失败。 -2. 如果返回结果为 `Query OK`,则表示执行成功。 - 1. 如果 `rows affected` 为 0,表示结果集为空,没有数据被导入。 - 2. 如果 `rows affected` 大于 0: - 1. 如果 `status` 为 `committed`,表示数据还不可见。需要通过 `show transaction` 语句查看状态直到 `visible` - 2. 如果 `status` 为 `visible`,表示数据导入成功。 - 3. 如果 `warnings` 大于 0,表示有数据被过滤,可以通过 `show load` 语句获取 url 查看被过滤的行。 - -### SHOW LAST INSERT - -在上一小节中我们介绍了如何根据 insert 操作的返回结果进行后续处理。但一些语言的mysql类库中很难获取返回结果的中的 json 字符串。因此,Doris 还提供了 `SHOW LAST INSERT` 命令来显式的获取最近一次 insert 操作的结果。 - -当执行完一个 insert 操作后,可以在同一 session 连接中执行 `SHOW LAST INSERT`。该命令会返回最近一次insert 操作的结果,如: - -``` -mysql> show last insert\G -*************************** 1. row *************************** - TransactionId: 64067 - Label: insert_ba8f33aea9544866-8ed77e2844d0cc9b - Database: default_cluster:db1 - Table: t1 -TransactionStatus: VISIBLE - LoadedRows: 2 - FilteredRows: 0 -``` - -该命令会返回 insert 以及对应事务的详细信息。因此,用户可以在每次执行完 insert 操作后,继续执行 `show last insert` 命令来获取 insert 的结果。 - -> 注意:该命令只会返回在同一 session 连接中,最近一次 insert 操作的结果。如果连接断开或更换了新的连接,则将返回空集。 - -## 相关系统配置 - -### FE 配置 - -+ timeout - - 导入任务的超时时间(以秒为单位),导入任务在设定的 timeout 时间内未完成则会被系统取消,变成 CANCELLED。 - - 目前 Insert Into 并不支持自定义导入的 timeout 时间,所有 Insert Into 导入的超时时间是统一的,默认的 timeout 时间为1小时。如果导入的源文件无法再规定时间内完成导入,则需要调整 FE 的参数```insert_load_default_timeout_second```。 - - 同时 Insert Into 语句收到 Session 变量 `query_timeout` 的限制。可以通过 `SET query_timeout = xxx;` 来增加超时时间,单位是秒。 - -### Session 变量 - -+ enable\_insert\_strict - - Insert Into 导入本身不能控制导入可容忍的错误率。用户只能通过 `enable_insert_strict` 这个 Session 参数用来控制。 - - 当该参数设置为 false 时,表示至少有一条数据被正确导入,则返回成功。如果有失败数据,则还会返回一个 Label。 - - 当该参数设置为 true 时,表示如果有一条数据错误,则导入失败。 - - 默认为 false。可通过 `SET enable_insert_strict = true;` 来设置。 - -+ query\_timeout - - Insert Into 本身也是一个 SQL 命令,因此 Insert Into 语句也受到 Session 变量 `query_timeout` 的限制。可以通过 `SET query_timeout = xxx;` 来增加超时时间,单位是秒。 - -## 最佳实践 - -### 应用场景 -1. 用户希望仅导入几条假数据,验证一下 Doris 系统的功能。此时适合使用 INSERT INTO VALUES 的语法。 -2. 用户希望将已经在 Doris 表中的数据进行 ETL 转换并导入到一个新的 Doris 表中,此时适合使用 INSERT INTO SELECT 语法。 -3. 用户可以创建一种外部表,如 MySQL 外部表映射一张 MySQL 系统中的表。或者创建 Broker 外部表来映射 HDFS 上的数据文件。然后通过 INSERT INTO SELECT 语法将外部表中的数据导入到 Doris 表中存储。 - -### 数据量 -Insert Into 对数据量没有限制,大数据量导入也可以支持。但 Insert Into 有默认的超时时间,用户预估的导入数据量过大,就需要修改系统的 Insert Into 导入超时时间。 - -``` -导入数据量 = 36G 约≤ 3600s * 10M/s -其中 10M/s 是最大导入限速,用户需要根据当前集群情况计算出平均的导入速度来替换公式中的 10M/s -``` - -### 完整例子 - -用户有一张表 store\_sales 在数据库 sales 中,用户又创建了一张表叫 bj\_store\_sales 也在数据库 sales 中,用户希望将 store\_sales 中销售记录在 bj 的数据导入到这张新建的表 bj\_store\_sales 中。导入的数据量约为:10G。 - -``` -store_sales schema: -(id, total, user_id, sale_timestamp, region) - -bj_store_sales schema: -(id, total, user_id, sale_timestamp) - -``` - -集群情况:用户当前集群的平均导入速度约为 5M/s - -+ Step1: 判断是否要修改 Insert Into 的默认超时时间 - - ``` - 计算导入的大概时间 - 10G / 5M/s = 2000s - - 修改 FE 配置 - insert_load_default_timeout_second = 2000 - ``` - -+ Step2:创建导入任务 - - 由于用户是希望将一张表中的数据做 ETL 并导入到目标表中,所以应该使用 Insert into query\_stmt 方式导入。 - - ``` - INSERT INTO bj_store_sales WITH LABEL `label` SELECT id, total, user_id, sale_timestamp FROM store_sales where region = "bj"; - ``` - -## 常见问题 - -* 查看错误行 - - 由于 Insert Into 无法控制错误率,只能通过 `enable_insert_strict` 设置为完全容忍错误数据或完全忽略错误数据。因此如果 `enable_insert_strict` 设为 true,则 Insert Into 可能会失败。而如果 `enable_insert_strict` 设为 false,则可能出现仅导入了部分合格数据的情况。 - - 当返回结果中提供了 url 字段时,可以通过以下命令查看错误行: - - ```SHOW LOAD WARNINGS ON "url";``` - - 示例: - - ```SHOW LOAD WARNINGS ON "http://ip:port/api/_load_error_log?file=__shard_13/error_log_insert_stmt_d2cac0a0a16d482d-9041c949a4b71605_d2cac0a0a16d482d_9041c949a4b71605";``` - - 错误的原因通常如:源数据列长度超过目的数据列长度、列类型不匹配、分区不匹配、列顺序不匹配等等。 diff --git a/docs/zh-CN/administrator-guide/load-data/load-json-format.md b/docs/zh-CN/administrator-guide/load-data/load-json-format.md deleted file mode 100644 index b7e9451b3e..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/load-json-format.md +++ /dev/null @@ -1,470 +0,0 @@ ---- -{ - "title": "导入 Json 格式数据", - "language": "zh-CN" -} ---- - - - -# 导入 Json 格式数据 - -Doris 从 0.12 版本开始支持 Json 格式的数据导入。 - -## 支持的导入方式 - -目前只有以下导入方式支持 Json 格式的数据导入: - -* Stream Load -* Routine Load - -关于以上导入方式的具体说明,请参阅相关文档。本文档主要介绍在这些导入方式中关于 Json 部分的使用说明。 - -## 支持的 Json 格式 - -当前前仅支持以下两种 Json 格式: - -1. 以 Array 表示的多行数据 - - 以 Array 为根节点的 Json 格式。Array 中的每个元素表示要导入的一行数据,通常是一个 Object。示例如下: - - ``` - [ - { "id": 123, "city" : "beijing"}, - { "id": 456, "city" : "shanghai"}, - ... - ] - ``` - - ``` - [ - { "id": 123, "city" : { "name" : "beijing", "region" : "haidian"}}, - { "id": 456, "city" : { "name" : "beijing", "region" : "chaoyang"}}, - ... - ] - ``` - - 这种方式通常用于 Stream Load 导入方式,以便在一批导入数据中表示多行数据。 - - 这种方式必须配合设置 `strip_outer_array=true` 使用。Doris在解析时会将数组展开,然后依次解析其中的每一个 Object 作为一行数据。 - -2. 以 Object 表示的单行数据 - - 以 Object 为根节点的 Json 格式。整个 Object 即表示要导入的一行数据。示例如下: - - ``` - { "id": 123, "city" : "beijing"} - ``` - - ``` - { "id": 123, "city" : { "name" : "beijing", "region" : "haidian" }} - ``` - - 这种方式通常用于 Routine Load 导入方式,如表示 Kafka 中的一条消息,即一行数据。 - -## Json Path - -Doris 支持通过 Json Path 抽取 Json 中指定的数据。 - -**注:因为对于 Array 类型的数据,Doris 会先进行数组展开,最终按照 Object 格式进行单行处理。所以本文档之后的示例都以单个 Object 格式的 Json 数据进行说明。** - -* 不指定 Json Path - - 如果没有指定 Json Path,则 Doris 会默认使用表中的列名查找 Object 中的元素。示例如下: - - 表中包含两列: `id`, `city` - - Json 数据如下: - - ``` - { "id": 123, "city" : "beijing"} - ``` - - 则 Doris 会使用 `id`, `city` 进行匹配,得到最终数据 `123` 和 `beijing`。 - - 如果 Json 数据如下: - - ``` - { "id": 123, "name" : "beijing"} - ``` - - 则使用 `id`, `city` 进行匹配,得到最终数据 `123` 和 `null`。 - -* 指定 Json Path - - 通过一个 Json 数据的形式指定一组 Json Path。数组中的每个元素表示一个要抽取的列。示例如下: - - ``` - ["$.id", "$.name"] - ``` - ``` - ["$.id.sub_id", "$.name[0]", "$.city[0]"] - ``` - - Doris 会使用指定的 Json Path 进行数据匹配和抽取。 - -* 匹配非基本类型 - - 前面的示例最终匹配到的数值都是基本类型,如整型、字符串等。Doris 当前暂不支持复合类型,如 Array、Map 等。所以当匹配到一个非基本类型时,Doris 会将该类型转换为 Json 格式的字符串,并以字符串类型进行导入。示例如下: - - Json 数据为: - - ``` - { "id": 123, "city" : { "name" : "beijing", "region" : "haidian" }} - ``` - - Json Path 为 `["$.city"]`。则匹配到的元素为: - - ``` - { "name" : "beijing", "region" : "haidian" } - ``` - - 该元素会被转换为字符串进行后续导入操作: - - ``` - "{'name':'beijing','region':'haidian'}" - ``` - -* 匹配失败 - - 当匹配失败时,将会返回 `null`。示例如下: - - Json 数据为: - - ``` - { "id": 123, "name" : "beijing"} - ``` - - Json Path 为 `["$.id", "$.info"]`。则匹配到的元素为 `123` 和 `null`。 - - Doris 当前不区分 Json 数据中表示的 null 值,和匹配失败时产生的 null 值。假设 Json 数据为: - - ``` - { "id": 123, "name" : null } - ``` - - 则使用以下两种 Json Path 会获得相同的结果:`123` 和 `null`。 - - ``` - ["$.id", "$.name"] - ``` - ``` - ["$.id", "$.info"] - ``` - -* 完全匹配失败 - - 为防止一些参数设置错误导致的误操作。Doris 在尝试匹配一行数据时,如果所有列都匹配失败,则会认为这个是一个错误行。假设 Json 数据为: - - ``` - { "id": 123, "city" : "beijing" } - ``` - - 如果 Json Path 错误的写为(或者不指定 Json Path 时,表中的列不包含 `id` 和 `city`): - - ``` - ["$.ad", "$.infa"] - ``` - - 则会导致完全匹配失败,则该行会标记为错误行,而不是产出 `null, null`。 - -## Json Path 和 Columns - -Json Path 用于指定如何对 JSON 格式中的数据进行抽取,而 Columns 指定列的映射和转换关系。两者可以配合使用。 - -换句话说,相当于通过 Json Path,将一个 Json 格式的数据,按照 Json Path 中指定的列顺序进行了列的重排。之后,可以通过 Columns,将这个重排后的源数据和表的列进行映射。举例如下: - -数据内容: - -``` -{"k1" : 1, "k2": 2} -``` - -表结构: - -`k2 int, k1 int` - -导入语句1(以 Stream Load 为例): - -``` -curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -导入语句1中,仅指定了 Json Path,没有指定 Columns。其中 Json Path 的作用是将 Json 数据按照 Json Path 中字段的顺序进行抽取,之后会按照表结构的顺序进行写入。最终导入的数据结果如下: - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 2 | 1 | -+------+------+ -``` - -会看到,实际的 k1 列导入了 Json 数据中的 "k2" 列的值。这是因为,Json 中字段名称并不等同于表结构中字段的名称。我们需要显式的指定这两者之间的映射关系。 - -导入语句2: - -``` -curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -H "columns: k2, k1" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -相比如导入语句1,这里增加了 Columns 字段,用于描述列的映射关系,按 `k2, k1` 的顺序。即按Json Path 中字段的顺序抽取后,指定第一列为表中 k2 列的值,而第二列为表中 k1 列的值。最终导入的数据结果如下: - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 1 | 2 | -+------+------+ -``` - -当然,如其他导入一样,可以在 Columns 中进行列的转换操作。示例如下: - -``` -curl -v --location-trusted -u root: -H "format: json" -H "jsonpaths: [\"$.k2\", \"$.k1\"]" -H "columns: k2, tmp_k1, k1 = tmp_k1 * 100" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -上述示例会将 k1 的值乘以 100 后导入。最终导入的数据结果如下: - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 100 | 2 | -+------+------+ -``` - -## NULL 和 Default 值 - -示例数据如下: - -``` -[ - {"k1": 1, "k2": "a"}, - {"k1": 2}, - {"k1": 3, "k2": "c"}, -] -``` - -表结构为:`k1 int null, k2 varchar(32) null default "x"` - -导入语句如下: - -``` -curl -v --location-trusted -u root: -H "format: json" -H "strip_outer_array: true" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -用户可能期望的导入结果如下,即对于缺失的列,填写默认值。 - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 1 | a | -+------+------+ -| 2 | x | -+------+------+ -| 3 | c | -+------+------+ -``` - -但实际的导入结果如下,即对于缺失的列,补上了 NULL。 - -``` -+------+------+ -| k1 | k2 | -+------+------+ -| 1 | a | -+------+------+ -| 2 | NULL | -+------+------+ -| 3 | c | -+------+------+ -``` - -这是因为通过导入语句中的信息,Doris 并不知道 “缺失的列是表中的 k2 列”。 -如果要对以上数据按照期望结果导入,则导入语句如下: - -``` -curl -v --location-trusted -u root: -H "format: json" -H "strip_outer_array: true" -H "jsonpaths: [\"$.k1\", \"$.k2\"]" -H "columns: k1, tmp_k2, k2 = ifnull(tmp_k2, 'x')" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -## LargetInt与Decimal - -Doris支持LargeInt与Decimal等数据范围更大,数据精度更高的数据类型。但是由于Doris使用的Rapid Json库对于数字类型能够解析的最大范围为Int64与Double,这导致了通过Json导入LargeInt或Decimal时可能会出现:精度丢失,数据转换出错等问题。 - -示例数据如下: - -``` -[ - {"k1": 1, "k2":9999999999999.999999 } -] -``` - - -导入k2列类型为`Decimal(16, 9)`,数据为:`9999999999999.999999`。在进行Json导入时,由于Double转换的精度丢失导致了导入的数据为:`10000000000000.0002`,引发了导入出错。 - -为了解决这个问题,Doris在导入时提供了 `num_as_string`的开关。Doris在解析Json数据时会将数字类型转为字符串,然后在确保不会出现精度丢失的情况下进行导入。 - -``` -curl -v --location-trusted -u root: -H "format: json" -H "num_as_string: true" -T example.json http://127.0.0.1:8030/api/db1/tbl1/_stream_load -``` - -但是开启这个开关会引起一些意想不到的副作用。Doris 当前暂不支持复合类型,如 Array、Map 等。所以当匹配到一个非基本类型时,Doris 会将该类型转换为 Json 格式的字符串,而`num_as_string`会同样将复合类型的数字转换为字符串,举个例子: - -Json 数据为: - - { "id": 123, "city" : { "name" : "beijing", "city_id" : 1 }} - -不开启`num_as_string`时,导入的city列的数据为: - -`{ "name" : "beijing", "city_id" : 1 }` - -而开启了`num_as_string`时,导入的city列的数据为: - -`{ "name" : "beijing", "city_id" : "1" }` - -注意,这里导致了复合类型原先为1的数字类型的city_id被作为字符串列处理并添加上了引号,与原始数据相比,产生了变化。 - -所以用在使用Json导入时,要尽量避免LargeInt与Decimal与复合类型的同时导入。如果无法避免,则需要充分了解开启`num_as_string`后对复合类型导入的**副作用**。 - - -## 应用示例 - -### Stream Load - -因为 Json 格式的不可拆分特性,所以在使用 Stream Load 导入 Json 格式的文件时,文件内容会被全部加载到内存后,才开始处理。因此,如果文件过大的话,可能会占用较多的内存。 - -假设表结构为: - -``` -id INT NOT NULL, -city VARHCAR NULL, -code INT NULL -``` - -1. 导入单行数据1 - - ``` - {"id": 100, "city": "beijing", "code" : 1} - ``` - - * 不指定 Json Path - - ``` - curl --location-trusted -u user:passwd -H "format: json" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - 导入结果: - - ``` - 100 beijing 1 - ``` - - * 指定 Json Path - - ``` - curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - 导入结果: - - ``` - 100 beijing 1 - ``` - -2. 导入单行数据2 - - ``` - {"id": 100, "content": {"city": "beijing", "code" : 1}} - ``` - - * 指定 Json Path - - ``` - curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.content.city\",\"$.content.code\"]" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - 导入结果: - - ``` - 100 beijing 1 - ``` - -3. 导入多行数据 - - ``` - [ - {"id": 100, "city": "beijing", "code" : 1}, - {"id": 101, "city": "shanghai"}, - {"id": 102, "city": "tianjin", "code" : 3}, - {"id": 103, "city": "chongqing", "code" : 4}, - {"id": 104, "city": ["zhejiang", "guangzhou"], "code" : 5}, - { - "id": 105, - "city": { - "order1": ["guangzhou"] - }, - "code" : 6 - } - ] - ``` - - * 指定 Json Path - - ``` - curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -H "strip_outer_array: true" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - 导入结果: - - ``` - 100 beijing 1 - 101 shanghai NULL - 102 tianjin 3 - 103 chongqing 4 - 104 ["zhejiang","guangzhou"] 5 - 105 {"order1":["guangzhou"]} 6 - ``` - -4. 对导入数据进行转换 - - 数据依然是示例3中的多行数据,现需要对导入数据中的 `code` 列加1后导入。 - - ``` - curl --location-trusted -u user:passwd -H "format: json" -H "jsonpaths: [\"$.id\",\"$.city\",\"$.code\"]" -H "strip_outer_array: true" -H "columns: id, city, tmpc, code=tmpc+1" -T data.json http://localhost:8030/api/db1/tbl1/_stream_load - ``` - - 导入结果: - - ``` - 100 beijing 2 - 101 shanghai NULL - 102 tianjin 4 - 103 chongqing 5 - 104 ["zhejiang","guangzhou"] 6 - 105 {"order1":["guangzhou"]} 7 - ``` - -### Routine Load - -Routine Load 对 Json 数据的处理原理和 Stream Load 相同。在此不再赘述。 - -对于 Kafka 数据源,每个 Massage 中的内容被视作一个完整的 Json 数据。如果一个 Massage 中是以 Array 格式的表示的多行数据,则会导入多行,而 Kafka 的 offset 只会增加 1。而如果一个 Array 格式的 Json 表示多行数据,但是因为 Json 格式错误导致解析 Json 失败,则错误行只会增加 1(因为解析失败,实际上 Doris 无法判断其中包含多少行数据,只能按一行错误数据记录)。 diff --git a/docs/zh-CN/administrator-guide/load-data/load-manual.md b/docs/zh-CN/administrator-guide/load-data/load-manual.md deleted file mode 100644 index 2b82075f35..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/load-manual.md +++ /dev/null @@ -1,227 +0,0 @@ ---- -{ - "title": "导入总览", - "language": "zh-CN" -} ---- - - - -# 导入总览 - -导入(Load)功能就是将用户的原始数据导入到 Doris 中。导入成功后,用户即可通过 Mysql 客户端查询数据。 - -Doris 支持多种导入方式。建议先完整阅读本文档,再根据所选择的导入方式,查看各自导入方式的详细文档。 - -## 基本概念 - -1. Frontend(FE):Doris 系统的元数据和调度节点。在导入流程中主要负责导入规划生成和导入任务的调度工作。 -2. Backend(BE):Doris 系统的计算和存储节点。在导入流程中主要负责数据的 ETL 和存储。 -3. Broker:Broker 为一个独立的无状态进程。封装了文件系统接口,提供 Doris 读取远端存储系统中文件的能力。 -4. 导入作业(Load job):导入作业读取用户提交的源数据,转换或清洗后,将数据导入到 Doris 系统中。导入完成后,数据即可被用户查询到。 -5. Label:所有导入作业都有一个 Label。Label 在一个数据库内唯一,可由用户指定或系统自动生成,用于标识一个导入作业。相同的 Label 仅可用于一个成功的导入作业。 -6. MySQL 协议/HTTP 协议:Doris 提供两种访问协议接口。 MySQL 协议和 HTTP 协议。部分导入方式使用 MySQL 协议接口提交作业,部分导入方式使用 HTTP 协议接口提交作业。 - -## 导入方式 - -为适配不同的数据导入需求,Doris 系统提供了6种不同的导入方式。每种导入方式支持不同的数据源,存在不同的使用方式(异步,同步)。 - -所有导入方式都支持 csv 数据格式。其中 Broker load 还支持 parquet 和 orc 数据格式。 - -每个导入方式的说明请参阅单个导入方式的操作手册。 - -* Broker load - - 通过 Broker 进程访问并读取外部数据源(如 HDFS)导入到 Doris。用户通过 Mysql 协议提交导入作业后,异步执行。通过 `SHOW LOAD` 命令查看导入结果。 - -* Stream load - - 用户通过 HTTP 协议提交请求并携带原始数据创建导入。主要用于快速将本地文件或数据流中的数据导入到 Doris。导入命令同步返回导入结果。 - -* Insert - - 类似 MySQL 中的 Insert 语句,Doris 提供 `INSERT INTO tbl SELECT ...;` 的方式从 Doris 的表中读取数据并导入到另一张表。或者通过 `INSERT INTO tbl VALUES(...);` 插入单条数据。 - -* Multi load - - 用户通过 HTTP 协议提交多个导入作业。Multi Load 可以保证多个导入作业的原子生效。 - -* Routine load - - 用户通过 MySQL 协议提交例行导入作业,生成一个常驻线程,不间断的从数据源(如 Kafka)中读取数据并导入到 Doris 中。 - -* 通过S3协议直接导入 - - 用户通过S3协议直接导入数据,用法和Broker Load 类似 - -## 基本原理 - -### 导入执行流程 - -``` -+---------+ +---------+ +----------+ +-----------+ -| | | | | | | | -| PENDING +----->+ ETL +----->+ LOADING +----->+ FINISHED | -| | | | | | | | -+---------+ +---+-----+ +----+-----+ +-----------+ - | | | - | | | - | | | - | | | +-----------+ - | | | | | - +---------------+-----------------+------------> CANCELLED | - | | - +-----------+ - -``` - -如上图,一个导入作业主要经过上面4个阶段。 - -+ PENDING(非必须): 该阶段只有 Broker Load 才有。Broker Load 被用户提交后会短暂停留在这个阶段,直到被 FE 中的 Scheduler 调度。 其中 Scheduler 的调度间隔为5秒。 - -+ ETL(非必须): 该阶段在版本 0.10.0(包含) 之前存在,主要是用于将原始数据按照用户声明的方式进行变换,并且过滤不满足条件的原始数据。在 0.10.0 后的版本,ETL 阶段不再存在,其中数据 transform 的工作被合并到 LOADING 阶段。 - -+ LOADING: 该阶段在版本 0.10.0(包含)之前主要用于将变换后的数据推到对应的 BE 存储中。在 0.10.0 后的版本,该阶段先对数据进行清洗和变换,然后将数据发送到 BE 存储中。当所有导入数据均完成导入后,进入等待生效过程,此时 Load job 依旧是 LOADING。 - -+ FINISHED: 在 Load job 涉及的所有数据均生效后,Load job 的状态变成 FINISHED。FINISHED 后导入的数据均可查询。 - -+ CANCELLED: 在作业 FINISHED 之前,作业都可能被取消并进入 CANCELLED 状态。如用户手动取消,或导入出现错误等。CANCELLED 也是 Load Job 的最终状态,不可被再次执行。 - -上述阶段,除了 PENDING 到 LOADING 阶段是 Scheduler 轮询调度的,其他阶段之前的转移都是回调机制实现。 - -### Label 和 原子性 - -Doris 对所有导入方式提供原子性保证。即保证同一个导入作业内的数据,原子生效。不会出现仅导入部分数据的情况。 - -同时,每一个导入作业都有一个由用户指定或者系统自动生成的 Label。Label 在一个 Database 内唯一。当一个 Label 对应的导入作业成功后,不可再重复使用该 Label 提交导入作业。如果 Label 对应的导入作业失败,则可以重复使用。 - -用户可以通过 Label 机制,来保证 Label 对应的数据最多被导入一次,即At-Most-Once 语义。 - -## 同步和异步 - -Doris 目前的导入方式分为两类,同步和异步。如果是外部程序接入 Doris 的导入功能,需要判断使用导入方式是哪类再确定接入逻辑。 - -### 同步 - -同步导入方式即用户创建导入任务,Doris 同步执行导入,执行完成后返回用户导入结果。用户可直接根据创建导入任务命令返回的结果同步判断导入是否成功。 - -同步类型的导入方式有: **Stream load**,**Insert**。 - -操作步骤: - -1. 用户(外部系统)创建导入任务。 -2. Doris 返回导入结果。 -3. 用户(外部系统)判断导入结果,如果失败可以再次提交导入任务。 - -*注意:如果用户使用的导入方式是同步返回的,且导入的数据量过大,则创建导入请求可能会花很长时间才能返回结果。* - -### 异步 -异步导入方式即用户创建导入任务后,Doris 直接返回创建成功。**创建成功不代表数据已经导入**。导入任务会被异步执行,用户在创建成功后,需要通过轮询的方式发送查看命令查看导入作业的状态。如果创建失败,则可以根据失败信息,判断是否需要再次创建。 - -异步类型的导入方式有:**Broker load**,**Multi load**。 - -操作步骤: - -1. 用户(外部系统)创建导入任务。 -2. Doris 返回导入创建结果。 -3. 用户(外部系统)判断导入创建结果,成功则进入4,失败回到重试创建导入,回到1。 -4. 用户(外部系统)轮询查看导入任务,直到状态变为 FINISHED 或 CANCELLED。 - -### 注意事项 -无论是异步还是同步的导入类型,都不应该在 Doris 返回导入失败或导入创建失败后,无休止的重试。**外部系统在有限次数重试并失败后,保留失败信息,大部分多次重试均失败问题都是使用方法问题或数据本身问题。** - -## 内存限制 - -用户可以通过设置参数来限制单个导入的内存使用,以防止导入占用过多的内存而导致系统OOM。 -不同导入方式限制内存的方式略有不同,可以参阅各自的导入手册查看。 - -一个导入作业通常会分布在多个 Backend 上执行,导入内存限制的是一个导入作业,在单个 Backend 上的内存使用,而不是在整个集群的内存使用。 - -同时,每个 Backend 会设置可用于导入的内存的总体上限。具体配置参阅下面的通用系统配置小节。这个配置限制了所有在该 Backend 上运行的导入任务的总体内存使用上限。 - -较小的内存限制可能会影响导入效率,因为导入流程可能会因为内存达到上限而频繁的将内存中的数据写回磁盘。而过大的内存限制可能导致当导入并发较高时,系统OOM。所以,需要根据需求,合理的设置导入的内存限制。 - -## 最佳实践 - -用户在接入 Doris 导入时,一般会采用程序接入的方式,这样可以保证数据被定期的导入到 Doris 中。下面主要说明了程序接入 Doris 的最佳实践。 - -1. 选择合适的导入方式:根据数据源所在位置选择导入方式。例如:如果原始数据存放在 HDFS 上,则使用 Broker load 导入。 -2. 确定导入方式的协议:如果选择了 Broker load 导入方式,则外部系统需要能使用 MySQL 协议定期提交和查看导入作业。 -3. 确定导入方式的类型:导入方式为同步或异步。比如 Broker load 为异步导入方式,则外部系统在提交创建导入后,必须调用查看导入命令,根据查看导入命令的结果来判断导入是否成功。 -4. 制定 Label 生成策略:Label 生成策略需满足,每一批次数据唯一且固定的原则。这样 Doris 就可以保证 At-Most-Once。 -5. 程序自身保证 At-Least-Once:外部系统需要保证自身的 At-Least-Once,这样就可以保证导入流程的 Exactly-Once。 - -## 通用系统配置 - -下面主要解释了几个所有导入方式均通用的系统级别的配置。 - -### FE 配置 - -以下配置属于 FE 的系统配置,可以通过修改 FE 的配置文件 ```fe.conf``` 来修改配置。 - -+ max\_load\_timeout\_second 和 min\_load\_timeout\_second - - 这两个配置含义为:最大的导入超时时间,最小的导入超时时间,以秒为单位。默认的最大超时时间为3天, 默认的最小超时时间为1秒。用户自定义的导入超时时间不可超过这个范围。该参数通用于所有的导入方式。 - -+ desired\_max\_waiting\_jobs - - 在等待队列中的导入任务个数最大值,默认为100。当在 FE 中处于 PENDING 状态(也就是等待执行的)导入个数超过该值,新的导入请求则会被拒绝。 - - 此配置仅对异步执行的导入有效,当异步执行的导入等待个数超过默认值,则后续的创建导入请求会被拒绝。 - -+ max\_running\_txn\_num\_per\_db - - 这个配置的含义是说,每个 Database 中正在运行的导入最大个数(不区分导入类型,统一计数)。默认的最大导入并发为 100。当当前 Database 正在运行的导入个数超过最大值时,后续的导入不会被执行。如果是同步导入作业,则导入会被拒绝。如果是异步导入作业。则作业会在队列中等待。 - -### BE 配置 - -以下配置属于 BE 的系统配置,可以通过修改 BE 的配置文件 ```be.conf``` 来修改配置。 - -+ push\_write\_mbytes\_per\_sec - - BE 上单个 Tablet 的写入速度限制。默认是 10,即 10MB/s。通常 BE 对单个 Tablet 的最大写入速度,根据 Schema 以及系统的不同,大约在 10-30MB/s 之间。可以适当调整这个参数来控制导入速度。 - -+ write\_buffer\_size - - 导入数据在 BE 上会先写入一个 memtable,memtable 达到阈值后才会写回磁盘。默认大小是 100MB。过小的阈值可能导致 BE 上存在大量的小文件。可以适当提高这个阈值减少文件数量。但过大的阈值可能导致 RPC 超时,见下面的配置说明。 - -+ tablet\_writer\_rpc\_timeout\_sec - - 导入过程中,发送一个 Batch(1024行)的 RPC 超时时间。默认 600 秒。因为该 RPC 可能涉及多个 memtable 的写盘操作,所以可能会因为写盘导致 RPC 超时,可以适当调整这个超时时间来减少超时错误(如 `send batch fail` 错误)。同时,如果调大 `write_buffer_size` 配置,也需要适当调大这个参数。 - -+ streaming\_load\_rpc\_max\_alive\_time\_sec - - 在导入过程中,Doris 会为每一个 Tablet 开启一个 Writer,用于接收数据并写入。这个参数指定了 Writer 的等待超时时间。如果在这个时间内,Writer 没有收到任何数据,则 Writer 会被自动销毁。当系统处理速度较慢时,Writer 可能长时间接收不到下一批数据,导致导入报错:`TabletWriter add batch with unknown id`。此时可适当增大这个配置。默认为 600 秒。 - -* load\_process\_max\_memory\_limit\_bytes 和 load\_process\_max\_memory\_limit\_percent - - 这两个参数,限制了单个 Backend 上,可用于导入任务的内存上限。分别是最大内存和最大内存百分比。`load_process_max_memory_limit_percent` 默认为 80,表示对 Backend 总内存限制的百分比(总内存限制 `mem_limit` 默认为 80%,表示对物理内存的百分比)。即假设物理内存为 M,则默认导入内存限制为 M * 80% * 80%。 - - `load_process_max_memory_limit_bytes` 默认为 100GB。系统会在两个参数中取较小者,作为最终的 Backend 导入内存使用上限。 - -+ label\_keep\_max\_second - - 设置导入任务记录保留时间。已经完成的( FINISHED or CANCELLED )导入任务记录会保留在 Doris 系统中一段时间,时间由此参数决定。参数默认值时间为3天。该参数通用与所有类型的导入任务。 - -### 列映射 - 假设导入数据有为 `1,2,3`,表有 `c1,c2,c3` 三列,如果数据直接导入表中可以使用如下语句 `COLUMNS(c1,c2,c3)` 此语句等价于 `COLUMNS(tmp_c1,tmp_c2,tmp_c3,c1=tmp_c1,c2=tmp_c2,c3=tmp_c3)` -如果想再导入数据时执行变换或者使用临时变量,则变换或者临时变量一定要按照使用的顺序指定, 例如 `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = tmp_c1 +1, c2= c1+1, c3 =c2+1)`, 这样的语句等价于 `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = tmp_c1 +1, c2= tmp_c1 +1+1, c3 =tmp_c1 +1+1+1)` -在使用某个表达式时这个表达式一定要在前面定义,例如如下语句则不合法 `COLUMNS(tmp_c1,tmp_c2,tmp_c3, c1 = c1+1, c2 = temp + 1, temp = tmp_c1 +1, c3 =c2+1)` - diff --git a/docs/zh-CN/administrator-guide/load-data/routine-load-manual.md b/docs/zh-CN/administrator-guide/load-data/routine-load-manual.md deleted file mode 100644 index 4a68aef24c..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/routine-load-manual.md +++ /dev/null @@ -1,335 +0,0 @@ ---- -{ - "title": "Routine Load", - "language": "zh-CN" -} ---- - - - -# Routine Load - -例行导入(Routine Load)功能为用户提供了一种自动从指定数据源进行数据导入的功能。 - -本文档主要介绍该功能的实现原理、使用方式以及最佳实践。 - -## 名词解释 - -* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。 -* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。 -* RoutineLoadJob:用户提交的一个例行导入作业。 -* JobScheduler:例行导入作业调度器,用于调度和拆分一个 RoutineLoadJob 为多个 Task。 -* Task:RoutineLoadJob 被 JobScheduler 根据规则拆分的子任务。 -* TaskScheduler:任务调度器。用于调度 Task 的执行。 - -## 原理 - -``` - +---------+ - | Client | - +----+----+ - | -+-----------------------------+ -| FE | | -| +-----------v------------+ | -| | | | -| | Routine Load Job | | -| | | | -| +---+--------+--------+--+ | -| | | | | -| +---v--+ +---v--+ +---v--+ | -| | task | | task | | task | | -| +--+---+ +---+--+ +---+--+ | -| | | | | -+-----------------------------+ - | | | - v v v - +---+--+ +--+---+ ++-----+ - | BE | | BE | | BE | - +------+ +------+ +------+ - -``` - -如上图,Client 向 FE 提交一个例行导入作业。 - -FE 通过 JobScheduler 将一个导入作业拆分成若干个 Task。每个 Task 负责导入指定的一部分数据。Task 被 TaskScheduler 分配到指定的 BE 上执行。 - -在 BE 上,一个 Task 被视为一个普通的导入任务,通过 Stream Load 的导入机制进行导入。导入完成后,向 FE 汇报。 - -FE 中的 JobScheduler 根据汇报结果,继续生成后续新的 Task,或者对失败的 Task 进行重试。 - -整个例行导入作业通过不断的产生新的 Task,来完成数据不间断的导入。 - -## Kafka 例行导入 - -当前我们仅支持从 Kafka 系统进行例行导入。该部分会详细介绍 Kafka 例行导入使用方式和最佳实践。 - -### 使用限制 - -1. 支持无认证的 Kafka 访问,以及通过 SSL 方式认证的 Kafka 集群。 -2. 支持的消息格式为 csv, json 文本格式。csv 每一个 message 为一行,且行尾**不包含**换行符。 -3. 默认支持 Kafka 0.10.0 (含) 以上版本。如果要使用 Kafka 0.10.0 以下版本 (0.9.0, 0.8.2, 0.8.1, 0.8.0),需要修改 be 的配置,将 kafka_broker_version_fallback 的值设置为要兼容的旧版本,或者在创建routine load的时候直接设置 property.broker.version.fallback的值为要兼容的旧版本,使用旧版本的代价是routine load 的部分新特性可能无法使用,如根据时间设置 kafka 分区的 offset。 - -### 创建例行导入任务 - -创建例行导入任务的的详细语法可以连接到 Doris 后,执行 `HELP ROUTINE LOAD;` 查看语法帮助。这里主要详细介绍,创建作业时的注意事项。 - -* columns_mapping - - `columns_mapping` 主要用于指定表结构和 message 中的列映射关系,以及一些列的转换。如果不指定,Doris 会默认 message 中的列和表结构的列按顺序一一对应。虽然在正常情况下,如果源数据正好一一对应,则不指定也可以进行正常的数据导入。但是我们依然强烈建议用户**显式的指定列映射关系**。这样当表结构发生变化(比如增加一个 nullable 的列),或者源文件发生变化(比如增加了一列)时,导入任务依然可以继续进行。否则,当发生上述变动后,因为列映射关系不再一一对应,导入将报错。 - - 在 `columns_mapping` 中我们同样可以使用一些内置函数进行列的转换。但需要注意函数参数对应的实际列类型。举例说明: - - 假设用户需要导入只包含 `k1` 一列的表,列类型为 `int`。并且需要将源文件中的 null 值转换为 0。该功能可以通过 `ifnull` 函数实现。正确的使用方式如下: - - `COLUMNS (xx, k1=ifnull(xx, "0"))` - - 注意这里我们使用 `"0"` 而不是 `0`,虽然 `k1` 的类型为 `int`。因为对于导入任务来说,源数据中的列类型都为 `varchar`,所以这里 `xx` 虚拟列的类型也为 `varchar`。所以我们需要使用 `"0"` 来进行对应的匹配,否则 `ifnull` 函数无法找到参数为 `(varchar, int)` 的函数签名,将出现错误。 - - 再举例,假设用户需要导入只包含 `k1` 一列的表,列类型为 `int`。并且需要将源文件中的对应列进行处理:将负数转换为正数,而将正数乘以 100。这个功能可以通过 `case when` 函数实现,正确写法应如下: - - `COLUMNS (xx, k1 = case when xx < 0 then cast(-xx as varchar) else cast((xx + '100') as varchar) end)` - - 注意这里我们需要将 `case when` 中所有的参数都最终转换为 varchar,才能得到期望的结果。 - -* where_predicates - - `where_predicates` 中的的列的类型,已经是实际的列类型了,所以无需向 `columns_mapping` 那样强制的转换为 varchar 类型。按照实际的列类型书写即可。 - -* desired\_concurrent\_number - - `desired_concurrent_number` 用于指定一个例行作业期望的并发度。即一个作业,最多有多少 task 同时在执行。对于 Kafka 导入而言,当前的实际并发度计算如下: - - ``` - Min(partition num, desired_concurrent_number, Config.max_routine_load_task_concurrrent_num) - ``` - - 其中 `Config.max_routine_load_task_concurrrent_num` 是系统的一个默认的最大并发数限制。这是一个 FE 配置,可以通过改配置调整。默认为 5。 - - 其中 partition num 指订阅的 Kafka topic 的 partition 数量。 - -* max\_batch\_interval/max\_batch\_rows/max\_batch\_size - - 这三个参数用于控制单个任务的执行时间。其中任意一个阈值达到,则任务结束。其中 `max_batch_rows` 用于记录从 Kafka 中读取到的数据行数。`max_batch_size` 用于记录从 Kafka 中读取到的数据量,单位是字节。目前一个任务的消费速率大约为 5-10MB/s。 - - 那么假设一行数据 500B,用户希望每 100MB 或 10 秒为一个 task。100MB 的预期处理时间是 10-20 秒,对应的行数约为 200000 行。则一个合理的配置为: - - ``` - "max_batch_interval" = "10", - "max_batch_rows" = "200000", - "max_batch_size" = "104857600" - ``` - - 以上示例中的参数也是这些配置的默认参数。 - -* max\_error\_number - - `max_error_number` 用于控制错误率。在错误率过高的时候,作业会自动暂停。因为整个作业是面向数据流的,且由于数据流的无边界性,我们无法像其他导入任务一样,通过一个错误比例来计算错误率。因此这里提供了一种新的计算方式,来计算数据流中的错误比例。 - - 我们设定了一个采样窗口。窗口的大小为 `max_batch_rows * 10`。在一个采样窗口内,如果错误行数超过 `max_error_number`,则作业被暂停。如果没有超过,则下一个窗口重新开始计算错误行数。 - - 我们假设 `max_batch_rows` 为 200000,则窗口大小为 2000000。设 `max_error_number` 为 20000,即用户预期每 2000000 行的错误行为 20000。即错误率为 1%。但是因为不是每批次任务正好消费 200000 行,所以窗口的实际范围是 [2000000, 2200000],即有 10% 的统计误差。 - - 错误行不包括通过 where 条件过滤掉的行。但是包括没有对应的 Doris 表中的分区的行。 - -* data\_source\_properties - - `data_source_properties` 中可以指定消费具体的 Kafka partition。如果不指定,则默认消费所订阅的 topic 的所有 partition。 - - 注意,当显式的指定了 partition,则导入作业不会再动态的检测 Kafka partition 的变化。如果没有指定,则会根据 kafka partition 的变化,动态调整需要消费的 partition。 - -* strict\_mode - - Routine load 导入可以开启 strict mode 模式。开启方式为在 job\_properties 中增加 ```"strict_mode" = "true"``` 。默认的 strict mode 为关闭。 - - strict mode 模式的意思是:对于导入过程中的列类型转换进行严格过滤。严格过滤的策略如下: - - 1. 对于列类型转换来说,如果 strict mode 为true,则错误的数据将被 filter。这里的错误数据是指:原始数据并不为空值,在参与列类型转换后结果为空值的这一类数据。 - - 2. 对于导入的某列由函数变换生成时,strict mode 对其不产生影响。 - - 3. 对于导入的某列类型包含范围限制的,如果原始数据能正常通过类型转换,但无法通过范围限制的,strict mode 对其也不产生影响。例如:如果类型是 decimal(1,0), 原始数据为 10,则属于可以通过类型转换但不在列声明的范围内。这种数据 strict 对其不产生影响。 -* merge\_type - 数据的合并类型,一共支持三种类型APPEND、DELETE、MERGE 其中,APPEND是默认值,表示这批数据全部需要追加到现有数据中,DELETE 表示删除与这批数据key相同的所有行,MERGE 语义 需要与delete 条件联合使用,表示满足delete 条件的数据按照DELETE 语义处理其余的按照APPEND 语义处理 - -#### strict mode 与 source data 的导入关系 - -这里以列类型为 TinyInt 来举例 - ->注:当表中的列允许导入空值时 - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|---------| -|空值 | \N | N/A | true or false | NULL| -|not null | aaa or 2000 | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 | 1 | true or false | correct data| - -这里以列类型为 Decimal(1,0) 举例 - ->注:当表中的列允许导入空值时 - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|--------| -|空值 | \N | N/A | true or false | NULL| -|not null | aaa | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 or 10 | 1 | true or false | correct data| - -> 注意:10 虽然是一个超过范围的值,但是因为其类型符合 decimal的要求,所以 strict mode对其不产生影响。10 最后会在其他 ETL 处理流程中被过滤。但不会被 strict mode 过滤。 - -#### 访问 SSL 认证的 Kafka 集群 - -访问 SSL 认证的 Kafka 集群需要用户提供用于认证 Kafka Broker 公钥的证书文件(ca.pem)。如果 Kafka 集群同时开启了客户端认证,则还需提供客户端的公钥(client.pem)、密钥文件(client.key),以及密钥密码。这里所需的文件需要先通过 `CREAE FILE` 命令上传到 Doris 中,**并且 catalog 名称为 `kafka`**。`CREATE FILE` 命令的具体帮助可以参见 `HELP CREATE FILE;`。这里给出示例: - -1. 上传文件 - - ``` - CREATE FILE "ca.pem" PROPERTIES("url" = "https://example_url/kafka-key/ca.pem", "catalog" = "kafka"); - CREATE FILE "client.key" PROPERTIES("url" = "https://example_urlkafka-key/client.key", "catalog" = "kafka"); - CREATE FILE "client.pem" PROPERTIES("url" = "https://example_url/kafka-key/client.pem", "catalog" = "kafka"); - ``` - -2. 创建例行导入作业 - - ``` - CREATE ROUTINE LOAD db1.job1 on tbl1 - PROPERTIES - ( - "desired_concurrent_number"="1" - ) - FROM KAFKA - ( - "kafka_broker_list"= "broker1:9091,broker2:9091", - "kafka_topic" = "my_topic", - "property.security.protocol" = "ssl", - "property.ssl.ca.location" = "FILE:ca.pem", - "property.ssl.certificate.location" = "FILE:client.pem", - "property.ssl.key.location" = "FILE:client.key", - "property.ssl.key.password" = "abcdefg" - ); - ``` - -> Doris 通过 Kafka 的 C++ API `librdkafka` 来访问 Kafka 集群。`librdkafka` 所支持的参数可以参阅 -> -> - - -### 查看导入作业状态 - -查看**作业**状态的具体命令和示例可以通过 `HELP SHOW ROUTINE LOAD;` 命令查看。 - -查看**任务**运行状态的具体命令和示例可以通过 `HELP SHOW ROUTINE LOAD TASK;` 命令查看。 - -只能查看当前正在运行中的任务,已结束和未开始的任务无法查看。 - -### 修改作业属性 - -用户可以修改已经创建的作业。具体说明可以通过 `HELP ALTER ROUTINE LOAD;` 命令查看。或参阅 [ALTER ROUTINE LOAD](../../sql-reference/sql-statements/Data%20Manipulation/alter-routine-load.md)。 - -### 作业控制 - -用户可以通过 `STOP/PAUSE/RESUME` 三个命令来控制作业的停止,暂停和重启。可以通过 `HELP STOP ROUTINE LOAD;`, `HELP PAUSE ROUTINE LOAD;` 以及 `HELP RESUME ROUTINE LOAD;` 三个命令查看帮助和示例。 - -## 其他说明 - -1. 例行导入作业和 ALTER TABLE 操作的关系 - - * 例行导入不会阻塞 SCHEMA CHANGE 和 ROLLUP 操作。但是注意如果 SCHEMA CHANGE 完成后,列映射关系无法匹配,则会导致作业的错误数据激增,最终导致作业暂停。建议通过在例行导入作业中显式指定列映射关系,以及通过增加 Nullable 列或带 Default 值的列来减少这类问题。 - * 删除表的 Partition 可能会导致导入数据无法找到对应的 Partition,作业进入暂停。 - -2. 例行导入作业和其他导入作业的关系(LOAD, DELETE, INSERT) - - * 例行导入和其他 LOAD 作业以及 INSERT 操作没有冲突。 - * 当执行 DELETE 操作时,对应表分区不能有任何正在执行的导入任务。所以在执行 DELETE 操作前,可能需要先暂停例行导入作业,并等待已下发的 task 全部完成后,才可以执行 DELETE。 - -3. 例行导入作业和 DROP DATABASE/TABLE 操作的关系 - - 当例行导入对应的 database 或 table 被删除后,作业会自动 CANCEL。 - -4. kafka 类型的例行导入作业和 kafka topic 的关系 - - 当用户在创建例行导入声明的 `kafka_topic` 在kafka集群中不存在时。 - - * 如果用户 kafka 集群的 broker 设置了 `auto.create.topics.enable = true`,则 `kafka_topic` 会先被自动创建,自动创建的 partition 个数是由**用户方的kafka集群**中的 broker 配置 `num.partitions` 决定的。例行作业会正常的不断读取该 topic 的数据。 - * 如果用户 kafka 集群的 broker 设置了 `auto.create.topics.enable = false`, 则 topic 不会被自动创建,例行作业会在没有读取任何数据之前就被暂停,状态为 `PAUSED`。 - - 所以,如果用户希望当 kafka topic 不存在的时候,被例行作业自动创建的话,只需要将**用户方的kafka集群**中的 broker 设置 `auto.create.topics.enable = true` 即可。 - 5. 在网络隔离的环境中可能出现的问题 - 在有些环境中存在网段和域名解析的隔离措施,所以需要注意 - 1. 创建Routine load 任务中指定的 Broker list 必须能够被Doris服务访问 - 2. Kafka 中如果配置了`advertised.listeners`, `advertised.listeners` 中的地址必须能够被Doris服务访问 - -6. 关于指定消费的 Partition 和 Offset - - Doris 支持指定 Partition 和 Offset 开始消费。新版中还支持了指定时间点进行消费的功能。这里说明下对应参数的配置关系。 - - 有三个相关参数: - - * `kafka_partitions`:指定待消费的 partition 列表,如:"0, 1, 2, 3"。 - * `kafka_offsets`:指定每个分区的起始offset,必须和 `kafka_partitions` 列表个数对应。如:"1000, 1000, 2000, 2000" - * `property.kafka_default_offset`:指定分区默认的起始offset。 - - 在创建导入作业时,这三个参数可以有以下组合: - - | 组合 | `kafka_partitions` | `kafka_offsets` | `property.kafka_default_offset` | 行为 | - |---|---|---|---|---| - |1| No | No | No | 系统会自动查找topic对应的所有分区并从 OFFSET_END 开始消费 | - |2| No | No | Yes | 系统会自动查找topic对应的所有分区并从 default offset 指定的位置开始消费| - |3| Yes | No | No | 系统会从指定分区的 OFFSET_END 开始消费 | - |4| Yes | Yes | No | 系统会从指定分区的指定offset 处开始消费 | - |5| Yes | No | Yes | 系统会从指定分区,default offset 指定的位置开始消费 | - - 7. STOP和PAUSE的区别 - - FE会自动定期清理STOP状态的ROUTINE LOAD,而PAUSE状态的则可以再次被恢复启用。 - -## 相关参数 - -一些系统配置参数会影响例行导入的使用。 - -1. max\_routine\_load\_task\_concurrent\_num - - FE 配置项,默认为 5,可以运行时修改。该参数限制了一个例行导入作业最大的子任务并发数。建议维持默认值。设置过大,可能导致同时并发的任务数过多,占用集群资源。 - -2. max\_routine_load\_task\_num\_per\_be - - FE 配置项,默认为5,可以运行时修改。该参数限制了每个 BE 节点最多并发执行的子任务个数。建议维持默认值。如果设置过大,可能导致并发任务数过多,占用集群资源。 - -3. max\_routine\_load\_job\_num - - FE 配置项,默认为100,可以运行时修改。该参数限制的例行导入作业的总数,包括 NEED_SCHEDULED, RUNNING, PAUSE 这些状态。超过后,不能在提交新的作业。 - -4. max\_consumer\_num\_per\_group - - BE 配置项,默认为 3。该参数表示一个子任务中最多生成几个 consumer 进行数据消费。对于 Kafka 数据源,一个 consumer 可能消费一个或多个 kafka partition。假设一个任务需要消费 6 个 kafka partition,则会生成 3 个 consumer,每个 consumer 消费 2 个 partition。如果只有 2 个 partition,则只会生成 2 个 consumer,每个 consumer 消费 1 个 partition。 - -5. push\_write\_mbytes\_per\_sec - - BE 配置项。默认为 10,即 10MB/s。该参数为导入通用参数,不限于例行导入作业。该参数限制了导入数据写入磁盘的速度。对于 SSD 等高性能存储设备,可以适当增加这个限速。 - -6. max\_tolerable\_backend\_down\_num - FE 配置项,默认值是0。在满足某些条件下,Doris可PAUSED的任务重新调度,即变成RUNNING。该参数为0代表只有所有BE节点是alive状态才允许重新调度。 - -7. period\_of\_auto\_resume\_min - FE 配置项,默认是5分钟。Doris重新调度,只会在5分钟这个周期内,最多尝试3次. 如果3次都失败则锁定当前任务,后续不在进行调度。但可通过人为干预,进行手动恢复。 - -## keyword - ROUTINE,LOAD diff --git a/docs/zh-CN/administrator-guide/load-data/s3-load-manual.md b/docs/zh-CN/administrator-guide/load-data/s3-load-manual.md deleted file mode 100644 index 3c9b6c5d88..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/s3-load-manual.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -{ -"title": "S3 Load", -"language": "zh-CN" -} ---- - - - -# S3 Load - -从0.14 版本开始,Doris 支持通过S3协议直接从支持S3协议的在线存储系统导入数据。 - -本文档主要介绍如何导入 AWS S3 中存储的数据。也支持导入其他支持S3协议的对象存储系统导入,如果百度云的BOS,阿里云的OSS和腾讯云的COS等、 - -## 适用场景 - -* 源数据在 支持S3协议的存储系统中,如 S3,BOS 等。 -* 数据量在 几十到百GB 级别。 - -## 准备工作 -1. 准本AK 和 SK - 首先需要找到或者重新生成 AWS `Access keys`,可以在AWS console 的 `My Security Credentials` 找到生成方式, 如下图所示: - [AK_SK](/images/aws_ak_sk.png) - 选择 `Create New Access Key` 注意保存生成 AK和SK. -2. 准备 REGION 和 ENDPOINT - REGION 可以在创建桶的时候选择也可以在桶列表中查看到。ENDPOINT 可以通过如下页面通过REGION查到 [AWS 文档](https://docs.aws.amazon.com/general/latest/gr/s3.html#s3_region) - -其他云存储系统可以相应的文档找到与S3兼容的相关信息 - -## 开始导入 -导入方式和Broker Load 基本相同,只需要将 `WITH BROKER broker_name ()` 语句替换成如下部分 -``` - WITH S3 - ( - "AWS_ENDPOINT" = "AWS_ENDPOINT", - "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", - "AWS_SECRET_KEY"="AWS_SECRET_KEY", - "AWS_REGION" = "AWS_REGION" - ) -``` - -完整示例如下 -``` - LOAD LABEL example_db.exmpale_label_1 - ( - DATA INFILE("s3://your_bucket_name/your_file.txt") - INTO TABLE load_test - COLUMNS TERMINATED BY "," - ) - WITH S3 - ( - "AWS_ENDPOINT" = "AWS_ENDPOINT", - "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", - "AWS_SECRET_KEY"="AWS_SECRET_KEY", - "AWS_REGION" = "AWS_REGION" - ) - PROPERTIES - ( - "timeout" = "3600" - ); -``` - -## 常见问题 - -S3 SDK 默认使用 virtual-hosted style 方式。但某些对象存储系统可能没开启或没支持 virtual-hosted style 方式的访问,此时我们可以添加 `use_path_style` 参数来强制使用 path style 方式: - -``` - WITH S3 - ( - "AWS_ENDPOINT" = "AWS_ENDPOINT", - "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", - "AWS_SECRET_KEY"="AWS_SECRET_KEY", - "AWS_REGION" = "AWS_REGION", - "use_path_style" = "true" - ) -``` diff --git a/docs/zh-CN/administrator-guide/load-data/sequence-column-manual.md b/docs/zh-CN/administrator-guide/load-data/sequence-column-manual.md deleted file mode 100644 index e9ffd0d16f..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/sequence-column-manual.md +++ /dev/null @@ -1,208 +0,0 @@ ---- -{ - "title": "sequence列", - "language": "zh-CN" -} ---- - - - -# sequence列 -sequence列目前只支持Uniq模型,Uniq模型主要针对需要唯一主键的场景,可以保证主键唯一性约束,但是由于使用REPLACE聚合方式,在同一批次中导入的数据,替换顺序不做保证,详细介绍可以参考[这里](../../getting-started/data-model-rollup.md)。替换顺序无法保证则无法确定最终导入到表中的具体数据,存在了不确定性。 - -为了解决这个问题,Doris支持了sequence列,通过用户在导入时指定sequence列,相同key列下,REPLACE聚合类型的列将按照sequence列的值进行替换,较大值可以替换较小值,反之则无法替换。该方法将顺序的确定交给了用户,由用户控制替换顺序。 - -## 原理 -通过增加一个隐藏列`__DORIS_SEQUENCE_COL__`实现,该列的类型由用户在建表时指定,在导入时确定该列具体值,并依据该值对REPLACE列进行替换。 - -### 建表 - -创建Uniq表时,将按照用户指定类型自动添加一个隐藏列`__DORIS_SEQUENCE_COL__` - -### 导入 - -导入时,fe在解析的过程中将隐藏列的值设置成 `order by` 表达式的值(broker load和routine load),或者`function_column.sequence_col`表达式的值(stream load), value列将按照该值进行替换。隐藏列`__DORIS_SEQUENCE_COL__`的值既可以设置为数据源中一列,也可以是表结构中的一列。 - -### 读取 - -请求包含value列时需要需要额外读取`__DORIS_SEQUENCE_COL__`列,该列用于在相同key列下,REPLACE聚合函数替换顺序的依据,较大值可以替换较小值,反之则不能替换。 - -### Cumulative Compaction - -Cumulative Compaction 时和读取过程原理相同 - -### Base Compaction - -Base Compaction 时读取过程原理相同 - -### 语法 -建表时语法方面在property中增加了一个属性,用来标识`__DORIS_SEQUENCE_COL__`的类型 -导入的语法设计方面主要是增加一个从sequence列的到其他column的映射,各个导入方式设置的将在下面介绍 - -#### 建表 -创建Uniq表时,可以指定sequence列类型 -``` -PROPERTIES ( - "function_column.sequence_type" = 'Date', -); -``` -sequence_type用来指定sequence列的类型,可以为整型和时间类型 - -#### stream load - -stream load 的写法是在header中的`function_column.sequence_col`字段添加隐藏列对应的source_sequence的映射, 示例 -``` -curl --location-trusted -u root -H "columns: k1,k2,source_sequence,v1,v2" -H "function_column.sequence_col: source_sequence" -T testData http://host:port/api/testDb/testTbl/_stream_load -``` - -#### broker load - -在`ORDER BY` 处设置隐藏列映射的source_sequence字段 - -``` -LOAD LABEL db1.label1 -( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl1` - COLUMNS TERMINATED BY "," - (k1,k2,source_sequence,v1,v2) - ORDER BY source_sequence -) -WITH BROKER 'broker' -( - "username"="user", - "password"="pass" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -#### routine load - -映射方式同上,示例如下 - -``` - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - [WITH MERGE|APPEND|DELETE] - COLUMNS(k1, k2, source_sequence, v1, v2), - WHERE k1 > 100 and k2 like "%doris%" - [ORDER BY source_sequence] - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,0,200" - ); -``` - -## 启用sequence column支持 -在新建表时如果设置了`function_column.sequence_type` ,则新建表将支持sequence column。 -对于一个不支持sequence column的表,如果想要使用该功能,可以使用如下语句: -`ALTER TABLE example_db.my_table ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date")` 来启用。 -如果确定一个表是否支持sequence column,可以通过设置一个session variable来显示隐藏列 `SET show_hidden_columns=true` ,之后使用`desc tablename`,如果输出中有`__DORIS_SEQUENCE_COL__` 列则支持,如果没有则不支持 - -## 使用示例 -下面以stream load 为例 展示下使用方式 -1. 创建支持sequence column的表 - -表结构如下: -``` -MySQL > desc test_table; -+-------------+--------------+------+-------+---------+---------+ -| Field | Type | Null | Key | Default | Extra | -+-------------+--------------+------+-------+---------+---------+ -| user_id | BIGINT | No | true | NULL | | -| date | DATE | No | true | NULL | | -| group_id | BIGINT | No | true | NULL | | -| modify_date | DATE | No | false | NULL | REPLACE | -| keyword | VARCHAR(128) | No | false | NULL | REPLACE | -+-------------+--------------+------+-------+---------+---------+ -``` - -2. 正常导入数据: - -导入如下数据 -``` -1 2020-02-22 1 2020-02-22 a -1 2020-02-22 1 2020-02-22 b -1 2020-02-22 1 2020-03-05 c -1 2020-02-22 1 2020-02-26 d -1 2020-02-22 1 2020-02-22 e -1 2020-02-22 1 2020-02-22 b -``` -此处以stream load为例, 将sequence column映射为modify_date列 -``` -curl --location-trusted -u root: -H "function_column.sequence_col: modify_date" -T testData http://host:port/api/test/test_table/_stream_load -``` -结果为 -``` -MySQL > select * from test_table; -+---------+------------+----------+-------------+---------+ -| user_id | date | group_id | modify_date | keyword | -+---------+------------+----------+-------------+---------+ -| 1 | 2020-02-22 | 1 | 2020-03-05 | c | -+---------+------------+----------+-------------+---------+ -``` -在这次导入中,因sequence column的值(也就是modify_date中的值)中'2020-03-05'为最大值,所以keyword列中最终保留了c。 - -3. 替换顺序的保证 - -上述步骤完成后,接着导入如下数据 -``` -1 2020-02-22 1 2020-02-22 a -1 2020-02-22 1 2020-02-23 b -``` -查询数据 -``` -MySQL [test]> select * from test_table; -+---------+------------+----------+-------------+---------+ -| user_id | date | group_id | modify_date | keyword | -+---------+------------+----------+-------------+---------+ -| 1 | 2020-02-22 | 1 | 2020-03-05 | c | -+---------+------------+----------+-------------+---------+ -``` -由于新导入的数据的sequence column都小于表中已有的值,无法替换 -再尝试导入如下数据 -``` -1 2020-02-22 1 2020-02-22 a -1 2020-02-22 1 2020-03-23 w -``` -查询数据 -``` -MySQL [test]> select * from test_table; -+---------+------------+----------+-------------+---------+ -| user_id | date | group_id | modify_date | keyword | -+---------+------------+----------+-------------+---------+ -| 1 | 2020-02-22 | 1 | 2020-03-23 | w | -+---------+------------+----------+-------------+---------+ -``` -此时就可以替换表中原有的数据 \ No newline at end of file diff --git a/docs/zh-CN/administrator-guide/load-data/spark-load-manual.md b/docs/zh-CN/administrator-guide/load-data/spark-load-manual.md deleted file mode 100644 index 4f1666275d..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/spark-load-manual.md +++ /dev/null @@ -1,596 +0,0 @@ ---- -{ - "title": "Spark Load", - "language": "zh-CN" -} ---- - - - -# Spark Load - -Spark load 通过外部的 Spark 资源实现对导入数据的预处理,提高 Doris 大数据量的导入性能并且节省 Doris 集群的计算资源。主要用于初次迁移,大数据量导入 Doris 的场景。 - -Spark load 是一种异步导入方式,用户需要通过 MySQL 协议创建 Spark 类型导入任务,并通过 `SHOW LOAD` 查看导入结果。 - -## 适用场景 - -* 源数据在 Spark 可以访问的存储系统中,如 HDFS。 -* 数据量在 几十 GB 到 TB 级别。 - -## 名词解释 - -1. Frontend(FE):Doris 系统的元数据和调度节点。在导入流程中主要负责导入任务的调度工作。 -2. Backend(BE):Doris 系统的计算和存储节点。在导入流程中主要负责数据写入及存储。 -3. Spark ETL:在导入流程中主要负责数据的 ETL 工作,包括全局字典构建(BITMAP类型)、分区、排序、聚合等。 -4. Broker:Broker 为一个独立的无状态进程。封装了文件系统接口,提供 Doris 读取远端存储系统中文件的能力。 -5. 全局字典: 保存了数据从原始值到编码值映射的数据结构,原始值可以是任意数据类型,而编码后的值为整型;全局字典主要应用于精确去重预计算的场景。 - -## 基本原理 - -### 基本流程 - -用户通过 MySQL 客户端提交 Spark 类型导入任务,FE记录元数据并返回用户提交成功。 - -Spark load 任务的执行主要分为以下5个阶段。 - -1. FE 调度提交 ETL 任务到 Spark 集群执行。 -2. Spark 集群执行 ETL 完成对导入数据的预处理。包括全局字典构建(BITMAP类型)、分区、排序、聚合等。 -3. ETL 任务完成后,FE 获取预处理过的每个分片的数据路径,并调度相关的 BE 执行 Push 任务。 -4. BE 通过 Broker 读取数据,转化为 Doris 底层存储格式。 -5. FE 调度生效版本,完成导入任务。 - -``` - + - | 0. User create spark load job - +----v----+ - | FE |---------------------------------+ - +----+----+ | - | 3. FE send push tasks | - | 5. FE publish version | - +------------+------------+ | - | | | | -+---v---+ +---v---+ +---v---+ | -| BE | | BE | | BE | |1. FE submit Spark ETL job -+---^---+ +---^---+ +---^---+ | - |4. BE push with broker | | -+---+---+ +---+---+ +---+---+ | -|Broker | |Broker | |Broker | | -+---^---+ +---^---+ +---^---+ | - | | | | -+---+------------+------------+---+ 2.ETL +-------------v---------------+ -| HDFS +-------> Spark cluster | -| <-------+ | -+---------------------------------+ +-----------------------------+ - -``` - -## 全局字典 -### 适用场景 -目前Doris中Bitmap列是使用类库```Roaringbitmap```实现的,而```Roaringbitmap```的输入数据类型只能是整型,因此如果要在导入流程中实现对于Bitmap列的预计算,那么就需要将输入数据的类型转换成整型。 - -在Doris现有的导入流程中,全局字典的数据结构是基于Hive表实现的,保存了原始值到编码值的映射。 -### 构建流程 -1. 读取上游数据源的数据,生成一张hive临时表,记为`hive_table`。 -2. 从`hive_table`中抽取待去重字段的去重值,生成一张新的hive表,记为`distinct_value_table`。 -3. 新建一张全局字典表,记为`dict_table`;一列为原始值,一列为编码后的值。 -4. 将`distinct_value_table`与`dict_table`做left join,计算出新增的去重值集合,然后对这个集合使用窗口函数进行编码,此时去重列原始值就多了一列编码后的值,最后将这两列的数据写回`dict_table`。 -5. 将`dict_table`与`hive_table`做join,完成`hive_table`中原始值替换成整型编码值的工作。 -6. `hive_table`会被下一步数据预处理的流程所读取,经过计算后导入到Doris中。 - -## 数据预处理(DPP) -### 基本流程 -1. 从数据源读取数据,上游数据源可以是HDFS文件,也可以是Hive表。 -2. 对读取到的数据进行字段映射,表达式计算以及根据分区信息生成分桶字段`bucket_id`。 -3. 根据Doris表的rollup元数据生成RollupTree。 -4. 遍历RollupTree,进行分层的聚合操作,下一个层级的rollup可以由上一个层的rollup计算得来。 -5. 每次完成聚合计算后,会对数据根据`bucket_id`进行分桶然后写入HDFS中。 -6. 后续broker会拉取HDFS中的文件然后导入Doris Be中。 - -## Hive Bitmap UDF - -Spark 支持将 hive 生成的 bitmap 数据直接导入到 Doris。详见 [hive-bitmap-udf 文档](../../extending-doris/hive-bitmap-udf.md) - -## 基本操作 - -### 配置 ETL 集群 - -Spark作为一种外部计算资源在Doris中用来完成ETL工作,未来可能还有其他的外部资源会加入到Doris中使用,如Spark/GPU用于查询,HDFS/S3用于外部存储,MapReduce用于ETL等,因此我们引入resource management来管理Doris使用的这些外部资源。 - -提交 Spark 导入任务之前,需要配置执行 ETL 任务的 Spark 集群。 - -语法: - -```sql --- create spark resource -CREATE EXTERNAL RESOURCE resource_name -PROPERTIES -( - type = spark, - spark_conf_key = spark_conf_value, - working_dir = path, - broker = broker_name, - broker.property_key = property_value -) - --- drop spark resource -DROP RESOURCE resource_name - --- show resources -SHOW RESOURCES -SHOW PROC "/resources" - --- privileges -GRANT USAGE_PRIV ON RESOURCE resource_name TO user_identity -GRANT USAGE_PRIV ON RESOURCE resource_name TO ROLE role_name - -REVOKE USAGE_PRIV ON RESOURCE resource_name FROM user_identity -REVOKE USAGE_PRIV ON RESOURCE resource_name FROM ROLE role_name -``` - -#### 创建资源 - -`resource_name` 为 Doris 中配置的 Spark 资源的名字。 - -`PROPERTIES` 是 Spark 资源相关参数,如下: - -- `type`:资源类型,必填,目前仅支持 spark。 - -- Spark 相关参数如下: - - `spark.master`: 必填,目前支持yarn,spark://host:port。 - - `spark.submit.deployMode`: Spark 程序的部署模式,必填,支持 cluster,client 两种。 - - `spark.hadoop.yarn.resourcemanager.address`: master为yarn时必填。 - - `spark.hadoop.fs.defaultFS`: master为yarn时必填。 - - 其他参数为可选,参考http://spark.apache.org/docs/latest/configuration.html -- `working_dir`: ETL 使用的目录。spark作为ETL资源使用时必填。例如:hdfs://host:port/tmp/doris。 -- `broker`: broker 名字。spark作为ETL资源使用时必填。需要使用`ALTER SYSTEM ADD BROKER` 命令提前完成配置。 - - `broker.property_key`: broker读取ETL生成的中间文件时需要指定的认证信息等。 - -示例: - -```sql --- yarn cluster 模式 -CREATE EXTERNAL RESOURCE "spark0" -PROPERTIES -( - "type" = "spark", - "spark.master" = "yarn", - "spark.submit.deployMode" = "cluster", - "spark.jars" = "xxx.jar,yyy.jar", - "spark.files" = "/tmp/aaa,/tmp/bbb", - "spark.executor.memory" = "1g", - "spark.yarn.queue" = "queue0", - "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999", - "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000", - "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", - "broker" = "broker0", - "broker.username" = "user0", - "broker.password" = "password0" -); - --- spark standalone client 模式 -CREATE EXTERNAL RESOURCE "spark1" -PROPERTIES -( - "type" = "spark", - "spark.master" = "spark://127.0.0.1:7777", - "spark.submit.deployMode" = "client", - "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", - "broker" = "broker1" -); -``` - -#### 查看资源 - -普通账户只能看到自己有USAGE_PRIV使用权限的资源。 - -root和admin账户可以看到所有的资源。 - -#### 资源权限 - -资源权限通过GRANT REVOKE来管理,目前仅支持USAGE_PRIV使用权限。 - -可以将USAGE_PRIV权限赋予某个用户或者某个角色,角色的使用与之前一致。 - -```sql --- 授予spark0资源的使用权限给用户user0 -GRANT USAGE_PRIV ON RESOURCE "spark0" TO "user0"@"%"; - --- 授予spark0资源的使用权限给角色role0 -GRANT USAGE_PRIV ON RESOURCE "spark0" TO ROLE "role0"; - --- 授予所有资源的使用权限给用户user0 -GRANT USAGE_PRIV ON RESOURCE * TO "user0"@"%"; - --- 授予所有资源的使用权限给角色role0 -GRANT USAGE_PRIV ON RESOURCE * TO ROLE "role0"; - --- 撤销用户user0的spark0资源使用权限 -REVOKE USAGE_PRIV ON RESOURCE "spark0" FROM "user0"@"%"; -``` - -### 配置 SPARK 客户端 - -FE底层通过执行spark-submit的命令去提交spark任务,因此需要为FE配置spark客户端,建议使用2.4.5或以上的spark2官方版本,[spark下载地址](https://archive.apache.org/dist/spark/),下载完成后,请按步骤完成以下配置。 - -#### 配置 SPARK_HOME 环境变量 - -将spark客户端放在FE同一台机器上的目录下,并在FE的配置文件配置`spark_home_default_dir`项指向此目录,此配置项默认为FE根目录下的 `lib/spark2x`路径,此项不可为空。 - -#### 配置 SPARK 依赖包 - -将spark客户端下的jars文件夹内所有jar包归档打包成一个zip文件,并在FE的配置文件配置`spark_resource_path`项指向此zip文件,若此配置项为空,则FE会尝试寻找FE根目录下的`lib/spark2x/jars/spark-2x.zip`文件,若没有找到则会报文件不存在的错误。 - -当提交spark load任务时,会将归档好的依赖文件上传至远端仓库,默认仓库路径挂在`working_dir/{cluster_id}`目录下,并以`__spark_repository__{resource_name}`命名,表示集群内的一个resource对应一个远端仓库,远端仓库目录结构参考如下: - -``` -__spark_repository__spark0/ - |-__archive_1.0.0/ - | |-__lib_990325d2c0d1d5e45bf675e54e44fb16_spark-dpp-1.0.0-jar-with-dependencies.jar - | |-__lib_7670c29daf535efe3c9b923f778f61fc_spark-2x.zip - |-__archive_1.1.0/ - | |-__lib_64d5696f99c379af2bee28c1c84271d5_spark-dpp-1.1.0-jar-with-dependencies.jar - | |-__lib_1bbb74bb6b264a270bc7fca3e964160f_spark-2x.zip - |-__archive_1.2.0/ - | |-... -``` - -除了spark依赖(默认以`spark-2x.zip`命名),FE还会上传DPP的依赖包至远端仓库,若此次spark load提交的所有依赖文件都已存在远端仓库,那么就不需要在上传依赖,省下原来每次重复上传大量文件的时间。 - -### 配置 YARN 客户端 - -FE底层通过执行yarn命令去获取正在运行的application的状态以及杀死application,因此需要为FE配置yarn客户端,建议使用2.5.2或以上的hadoop2官方版本,[hadoop下载地址](https://archive.apache.org/dist/hadoop/common/),下载完成后,请按步骤完成以下配置。 - -#### 配置 YARN 可执行文件路径 - -将下载好的yarn客户端放在FE同一台机器的目录下,并在FE配置文件配置`yarn_client_path`项指向yarn的二进制可执行文件,默认为FE根目录下的`lib/yarn-client/hadoop/bin/yarn`路径。 - -(可选) 当FE通过yarn客户端去获取application的状态或者杀死application时,默认会在FE根目录下的`lib/yarn-config`路径下生成执行yarn命令所需的配置文件,此路径可通过在FE配置文件配置`yarn_config_dir`项修改,目前生成的配置文件包括`core-site.xml`和`yarn-site.xml`。 - -### 创建导入 - -语法: - -```sql -LOAD LABEL load_label - (data_desc, ...) - WITH RESOURCE resource_name - [resource_properties] - [PROPERTIES (key1=value1, ... )] - -* load_label: - db_name.label_name - -* data_desc: - DATA INFILE ('file_path', ...) - [NEGATIVE] - INTO TABLE tbl_name - [PARTITION (p1, p2)] - [COLUMNS TERMINATED BY separator ] - [(col1, ...)] - [COLUMNS FROM PATH AS (col2, ...)] - [SET (k1=f1(xx), k2=f2(xx))] - [WHERE predicate] - - DATA FROM TABLE hive_external_tbl - [NEGATIVE] - INTO TABLE tbl_name - [PARTITION (p1, p2)] - [SET (k1=f1(xx), k2=f2(xx))] - [WHERE predicate] - -* resource_properties: - (key2=value2, ...) -``` -示例1:上游数据源为hdfs文件的情况 - -```sql -LOAD LABEL db1.label1 -( - DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file1") - INTO TABLE tbl1 - COLUMNS TERMINATED BY "," - (tmp_c1,tmp_c2) - SET - ( - id=tmp_c2, - name=tmp_c1 - ), - DATA INFILE("hdfs://abc.com:8888/user/palo/test/ml/file2") - INTO TABLE tbl2 - COLUMNS TERMINATED BY "," - (col1, col2) - where col1 > 1 -) -WITH RESOURCE 'spark0' -( - "spark.executor.memory" = "2g", - "spark.shuffle.compress" = "true" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -示例2:上游数据源是hive表的情况 - -```sql -step 1:新建hive外部表 -CREATE EXTERNAL TABLE hive_t1 -( - k1 INT, - K2 SMALLINT, - k3 varchar(50), - uuid varchar(100) -) -ENGINE=hive -properties -( -"database" = "tmp", -"table" = "t1", -"hive.metastore.uris" = "thrift://0.0.0.0:8080" -); - -step 2: 提交load命令,要求导入的 doris 表中的列必须在 hive 外部表中存在。 -LOAD LABEL db1.label1 -( - DATA FROM TABLE hive_t1 - INTO TABLE tbl1 - SET - ( - uuid=bitmap_dict(uuid) - ) -) -WITH RESOURCE 'spark0' -( - "spark.executor.memory" = "2g", - "spark.shuffle.compress" = "true" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -示例3:上游数据源是hive binary类型情况 - -```sql -step 1:新建hive外部表 -CREATE EXTERNAL TABLE hive_t1 -( - k1 INT, - K2 SMALLINT, - k3 varchar(50), - uuid varchar(100) //hive中的类型为binary -) -ENGINE=hive -properties -( -"database" = "tmp", -"table" = "t1", -"hive.metastore.uris" = "thrift://0.0.0.0:8080" -); - -step 2: 提交load命令,要求导入的 doris 表中的列必须在 hive 外部表中存在。 -LOAD LABEL db1.label1 -( - DATA FROM TABLE hive_t1 - INTO TABLE tbl1 - SET - ( - uuid=binary_bitmap(uuid) - ) -) -WITH RESOURCE 'spark0' -( - "spark.executor.memory" = "2g", - "spark.shuffle.compress" = "true" -) -PROPERTIES -( - "timeout" = "3600" -); - -``` - -创建导入的详细语法执行 ```HELP SPARK LOAD``` 查看语法帮助。这里主要介绍 Spark load 的创建导入语法中参数意义和注意事项。 - -#### Label - -导入任务的标识。每个导入任务,都有一个在单 database 内部唯一的 Label。具体规则与 `Broker Load` 一致。 - -#### 数据描述类参数 - -目前支持的数据源有CSV和hive table。其他规则与 `Broker Load` 一致。 - -#### 导入作业参数 - -导入作业参数主要指的是 Spark load 创建导入语句中的属于 ```opt_properties```部分的参数。导入作业参数是作用于整个导入作业的。规则与 `Broker Load` 一致。 - -#### Spark资源参数 - -Spark资源需要提前配置到 Doris系统中并且赋予用户USAGE_PRIV权限后才能使用 Spark load。 - -当用户有临时性的需求,比如增加任务使用的资源而修改 Spark configs,可以在这里设置,设置仅对本次任务生效,并不影响 Doris 集群中已有的配置。 - -```sql -WITH RESOURCE 'spark0' -( - "spark.driver.memory" = "1g", - "spark.executor.memory" = "3g" -) -``` -#### 数据源为hive表时的导入 -目前如果期望在导入流程中将hive表作为数据源,那么需要先新建一张类型为hive的外部表, -然后提交导入命令时指定外部表的表名即可。 - -#### 导入流程构建全局字典 -适用于doris表聚合列的数据类型为bitmap类型。 -在load命令中指定需要构建全局字典的字段即可,格式为:```doris字段名称=bitmap_dict(hive表字段名称)``` -需要注意的是目前只有在上游数据源为hive表时才支持全局字典的构建。 - -#### hive binary(bitmap)类型列的导入 -适用于doris表聚合列的数据类型为bitmap类型,且数据源hive表中对应列的数据类型为binary(通过FE中spark-dpp中的org.apache.doris.load.loadv2.dpp.BitmapValue类序列化)类型。 -无需构建全局字典,在load命令中指定相应字段即可,格式为:```doris字段名称=binary_bitmap(hive表字段名称)``` -同样,目前只有在上游数据源为hive表时才支持binary(bitmap)类型的数据导入。 - -### 查看导入 - -Spark load 导入方式同 Broker load 一样都是异步的,所以用户必须将创建导入的 Label 记录,并且在**查看导入命令中使用 Label 来查看导入结果**。查看导入命令在所有导入方式中是通用的,具体语法可执行 ```HELP SHOW LOAD``` 查看。 - -示例: - -``` -mysql> show load order by createtime desc limit 1\G -*************************** 1. row *************************** - JobId: 76391 - Label: label1 - State: FINISHED - Progress: ETL:100%; LOAD:100% - Type: SPARK - EtlInfo: unselected.rows=4; dpp.abnorm.ALL=15; dpp.norm.ALL=28133376 - TaskInfo: cluster:cluster0; timeout(s):10800; max_filter_ratio:5.0E-5 - ErrorMsg: N/A - CreateTime: 2019-07-27 11:46:42 - EtlStartTime: 2019-07-27 11:46:44 - EtlFinishTime: 2019-07-27 11:49:44 - LoadStartTime: 2019-07-27 11:49:44 -LoadFinishTime: 2019-07-27 11:50:16 - URL: http://1.1.1.1:8089/proxy/application_1586619723848_0035/ - JobDetails: {"ScannedRows":28133395,"TaskNumber":1,"FileNumber":1,"FileSize":200000} -``` - -返回结果集中参数意义可以参考 Broker load。不同点如下: - -+ State - - 导入任务当前所处的阶段。任务提交之后状态为 PENDING,提交 Spark ETL 之后状态变为 ETL,ETL 完成之后 FE 调度 BE 执行 push 操作状态变为 LOADING,push 完成并且版本生效后状态变为 FINISHED。 - - 导入任务的最终阶段有两个:CANCELLED 和 FINISHED,当 Load job 处于这两个阶段时导入完成。其中 CANCELLED 为导入失败,FINISHED 为导入成功。 - -+ Progress - - 导入任务的进度描述。分为两种进度:ETL 和 LOAD,对应了导入流程的两个阶段 ETL 和 LOADING。 - - LOAD 的进度范围为:0~100%。 - - ```LOAD 进度 = 当前已完成所有replica导入的tablet个数 / 本次导入任务的总tablet个数 * 100%``` - - **如果所有导入表均完成导入,此时 LOAD 的进度为 99%** 导入进入到最后生效阶段,整个导入完成后,LOAD 的进度才会改为 100%。 - - 导入进度并不是线性的。所以如果一段时间内进度没有变化,并不代表导入没有在执行。 - -+ Type - - 导入任务的类型。Spark load 为 SPARK。 - -+ CreateTime/EtlStartTime/EtlFinishTime/LoadStartTime/LoadFinishTime - - 这几个值分别代表导入创建的时间,ETL 阶段开始的时间,ETL 阶段完成的时间,LOADING 阶段开始的时间和整个导入任务完成的时间。 - -+ JobDetails - - 显示一些作业的详细运行状态,ETL 结束的时候更新。包括导入文件的个数、总大小(字节)、子任务个数、已处理的原始行数等。 - - ```{"ScannedRows":139264,"TaskNumber":1,"FileNumber":1,"FileSize":940754064}``` - -+ URL - - 可复制输入到浏览器,跳转至相应application的web界面 - -### 查看 spark launcher 提交日志 - -有时用户需要查看spark任务提交过程中产生的详细日志,日志默认保存在FE根目录下`log/spark_launcher_log`路径下,并以`spark_launcher_{load_job_id}_{label}.log`命名,日志会在此目录下保存一段时间,当FE元数据中的导入信息被清理时,相应的日志也会被清理,默认保存时间为3天。 - -### 取消导入 - -当 Spark load 作业状态不为 CANCELLED 或 FINISHED 时,可以被用户手动取消。取消时需要指定待取消导入任务的 Label 。取消导入命令语法可执行 ```HELP CANCEL LOAD```查看。 - - - -## 相关系统配置 - -### FE 配置 - -下面配置属于 Spark load 的系统级别配置,也就是作用于所有 Spark load 导入任务的配置。主要通过修改 ``` fe.conf```来调整配置值。 - -+ `enable_spark_load` - - 开启 Spark load 和创建 resource 功能。默认为 false,关闭此功能。 - -+ `spark_load_default_timeout_second` - - 任务默认超时时间为259200秒(3天)。 - -+ `spark_home_default_dir` - - spark客户端路径 (`fe/lib/spark2x`) 。 - -+ `spark_resource_path` - - 打包好的spark依赖文件路径(默认为空)。 - -+ `spark_launcher_log_dir` - - spark客户端的提交日志存放的目录(`fe/log/spark_launcher_log`)。 - -+ `yarn_client_path` - - yarn二进制可执行文件路径 (`fe/lib/yarn-client/hadoop/bin/yarn`) 。 - -+ `yarn_config_dir` - - yarn配置文件生成路径 (`fe/lib/yarn-config`) 。 - - -## 最佳实践 - -### 应用场景 - -使用 Spark load 最适合的场景就是原始数据在文件系统(HDFS)中,数据量在 几十 GB 到 TB 级别。小数据量还是建议使用 Stream load 或者 Broker load。 - - - -## 常见问题 - -* 使用Spark load时没有在spark客户端的spark-env.sh配置`HADOOP_CONF_DIR`环境变量。 - -如果`HADOOP_CONF_DIR`环境变量没有设置,会报 `When running with master 'yarn' either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the environment.` 错误。 - -* 使用Spark load时`spark_home_default_dir`配置项没有指定spark客户端根目录。 - -提交Spark job时用到spark-submit命令,如果`spark_home_default_dir`设置错误,会报 `Cannot run program "xxx/bin/spark-submit": error=2, No such file or directory` 错误。 - -* 使用Spark load时`spark_resource_path`配置项没有指向打包好的zip文件。 - -如果`spark_resource_path`没有设置正确,会报`File xxx/jars/spark-2x.zip does not exist` 错误。 - -* 使用Spark load时`yarn_client_path`配置项没有指定yarn的可执行文件。 - -如果`yarn_client_path`没有设置正确,会报`yarn client does not exist in path: xxx/yarn-client/hadoop/bin/yarn` 错误 - - - - - - diff --git a/docs/zh-CN/administrator-guide/load-data/stream-load-manual.md b/docs/zh-CN/administrator-guide/load-data/stream-load-manual.md deleted file mode 100644 index a5ba62acc4..0000000000 --- a/docs/zh-CN/administrator-guide/load-data/stream-load-manual.md +++ /dev/null @@ -1,415 +0,0 @@ ---- -{ - "title": "Stream load", - "language": "zh-CN" -} ---- - - - -# Stream load - -Stream load 是一个同步的导入方式,用户通过发送 HTTP 协议发送请求将本地文件或数据流导入到 Doris 中。Stream load 同步执行导入并返回导入结果。用户可直接通过请求的返回体判断本次导入是否成功。 - -Stream load 主要适用于导入本地文件,或通过程序导入数据流中的数据。 - -## 基本原理 - -下图展示了 Stream load 的主要流程,省略了一些导入细节。 - -``` - ^ + - | | - | | 1A. User submit load to FE - | | - | +--v-----------+ - | | FE | -5. Return result to user | +--+-----------+ - | | - | | 2. Redirect to BE - | | - | +--v-----------+ - +---+Coordinator BE| 1B. User submit load to BE - +-+-----+----+-+ - | | | - +-----+ | +-----+ - | | | 3. Distrbute data - | | | - +-v-+ +-v-+ +-v-+ - |BE | |BE | |BE | - +---+ +---+ +---+ -``` - -Stream load 中,Doris 会选定一个节点作为 Coordinator 节点。该节点负责接数据并分发数据到其他数据节点。 - -用户通过 HTTP 协议提交导入命令。如果提交到 FE,则 FE 会通过 HTTP redirect 指令将请求转发给某一个 BE。用户也可以直接提交导入命令给某一指定 BE。 - -导入的最终结果由 Coordinator BE 返回给用户。 - -## 支持数据格式 - -目前 Stream Load 支持两个数据格式:CSV(文本) 和 JSON - -## 基本操作 -### 创建导入 - -Stream load 通过 HTTP 协议提交和传输数据。这里通过 `curl` 命令展示如何提交导入。 - -用户也可以通过其他 HTTP client 进行操作。 - -``` -curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load - -Header 中支持属性见下面的 ‘导入任务参数’ 说明 -格式为: -H "key1:value1" -``` - -示例: - -``` -curl --location-trusted -u root -T date -H "label:123" http://abc.com:8030/api/test/date/_stream_load -``` -创建导入的详细语法帮助执行 ```HELP STREAM LOAD``` 查看, 下面主要介绍创建 Stream load 的部分参数意义。 - -#### 签名参数 - -+ user/passwd - - Stream load 由于创建导入的协议使用的是 HTTP 协议,通过 Basic access authentication 进行签名。Doris 系统会根据签名验证用户身份和导入权限。 - -#### 导入任务参数 - -Stream load 由于使用的是 HTTP 协议,所以所有导入任务有关的参数均设置在 Header 中。下面主要介绍了 Stream load 导入任务参数的部分参数意义。 - -+ label - - 导入任务的标识。每个导入任务,都有一个在单 database 内部唯一的 label。label 是用户在导入命令中自定义的名称。通过这个 label,用户可以查看对应导入任务的执行情况。 - - label 的另一个作用,是防止用户重复导入相同的数据。**强烈推荐用户同一批次数据使用相同的 label。这样同一批次数据的重复请求只会被接受一次,保证了 At-Most-Once** - - 当 label 对应的导入作业状态为 CANCELLED 时,该 label 可以再次被使用。 - -+ column_separator - - 用于指定导入文件中的列分隔符,默认为\t。如果是不可见字符,则需要加\x作为前缀,使用十六进制来表示分隔符。 - - 如hive文件的分隔符\x01,需要指定为-H "column_separator:\x01"。 - - 可以使用多个字符的组合作为列分隔符。 - -+ line_delimiter - - 用于指定导入文件中的换行符,默认为\n。 - - 可以使用做多个字符的组合作为换行符。 - -+ max\_filter\_ratio - - 导入任务的最大容忍率,默认为0容忍,取值范围是0~1。当导入的错误率超过该值,则导入失败。 - - 如果用户希望忽略错误的行,可以通过设置这个参数大于 0,来保证导入可以成功。 - - 计算公式为: - - ``` (dpp.abnorm.ALL / (dpp.abnorm.ALL + dpp.norm.ALL ) ) > max_filter_ratio ``` - - ```dpp.abnorm.ALL``` 表示数据质量不合格的行数。如类型不匹配,列数不匹配,长度不匹配等等。 - - ```dpp.norm.ALL``` 指的是导入过程中正确数据的条数。可以通过 ```SHOW LOAD``` 命令查询导入任务的正确数据量。 - - 原始文件的行数 = `dpp.abnorm.ALL + dpp.norm.ALL` - -+ where - - 导入任务指定的过滤条件。Stream load 支持对原始数据指定 where 语句进行过滤。被过滤的数据将不会被导入,也不会参与 filter ratio 的计算,但会被计入```num_rows_unselected```。 - -+ partition - - 待导入表的 Partition 信息,如果待导入数据不属于指定的 Partition 则不会被导入。这些数据将计入 ```dpp.abnorm.ALL ``` - -+ columns - - 待导入数据的函数变换配置,目前 Stream load 支持的函数变换方法包含列的顺序变化以及表达式变换,其中表达式变换的方法与查询语句的一致。 - - ``` - 列顺序变换例子:原始数据有三列(src_c1,src_c2,src_c3), 目前doris表也有三列(dst_c1,dst_c2,dst_c3) - - 如果原始表的src_c1列对应目标表dst_c1列,原始表的src_c2列对应目标表dst_c2列,原始表的src_c3列对应目标表dst_c3列,则写法如下: - columns: dst_c1, dst_c2, dst_c3 - - 如果原始表的src_c1列对应目标表dst_c2列,原始表的src_c2列对应目标表dst_c3列,原始表的src_c3列对应目标表dst_c1列,则写法如下: - columns: dst_c2, dst_c3, dst_c1 - - 表达式变换例子:原始文件有两列,目标表也有两列(c1,c2)但是原始文件的两列均需要经过函数变换才能对应目标表的两列,则写法如下: - columns: tmp_c1, tmp_c2, c1 = year(tmp_c1), c2 = month(tmp_c2) - 其中 tmp_*是一个占位符,代表的是原始文件中的两个原始列。 - ``` - -+ exec\_mem\_limit - - 导入内存限制。默认为 2GB,单位为字节。 - -+ strict\_mode - - Stream load 导入可以开启 strict mode 模式。开启方式为在 HEADER 中声明 ```strict_mode=true``` 。默认的 strict mode 为关闭。 - - strict mode 模式的意思是:对于导入过程中的列类型转换进行严格过滤。严格过滤的策略如下: - - 1. 对于列类型转换来说,如果 strict mode 为true,则错误的数据将被 filter。这里的错误数据是指:原始数据并不为空值,在参与列类型转换后结果为空值的这一类数据。 - - 2. 对于导入的某列由函数变换生成时,strict mode 对其不产生影响。 - - 3. 对于导入的某列类型包含范围限制的,如果原始数据能正常通过类型转换,但无法通过范围限制的,strict mode 对其也不产生影响。例如:如果类型是 decimal(1,0), 原始数据为 10,则属于可以通过类型转换但不在列声明的范围内。这种数据 strict 对其不产生影响。 -+ merge\_type - 数据的合并类型,一共支持三种类型APPEND、DELETE、MERGE 其中,APPEND是默认值,表示这批数据全部需要追加到现有数据中,DELETE 表示删除与这批数据key相同的所有行,MERGE 语义 需要与delete 条件联合使用,表示满足delete 条件的数据按照DELETE 语义处理其余的按照APPEND 语义处理 - -+ two\_phase\_commit - - Stream load 导入可以开启两阶段事务提交模式。开启方式为在 HEADER 中声明 ```two_phase_commit=true``` 。默认的两阶段批量事务提交为关闭。 - 两阶段批量事务提交模式的意思是:Stream load过程中,数据写入完成即会返回信息给用户,此时数据不可见,事务状态为PRECOMMITTED,用户手动触发commit操作之后,数据才可见。 - - 1. 用户可以调用如下接口对stream load事务触发commit操作: - ``` - curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:commit" http://fe_host:http_port/api/{db}/_stream_load_2pc - ``` - 或 - ``` - curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:commit" http://be_host:webserver_port/api/{db}/_stream_load_2pc - ``` - 2. 用户可以调用如下接口对stream load事务触发abort操作: - ``` - curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:abort" http://fe_host:http_port/api/{db}/_stream_load_2pc - ``` - 或 - ``` - curl -X PUT --location-trusted -u user:passwd -H "txn_id:txnId" -H "txn_operation:abort" http://be_host:webserver_port/api/{db}/_stream_load_2pc - ``` - -#### strict mode 与 source data 的导入关系 - -这里以列类型为 TinyInt 来举例 - ->注:当表中的列允许导入空值时 - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|---------| -|空值 | \N | N/A | true or false | NULL| -|not null | aaa or 2000 | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 | 1 | true or false | correct data| - -这里以列类型为 Decimal(1,0) 举例 - ->注:当表中的列允许导入空值时 - -|source data | source data example | string to int | strict_mode | result| -|------------|---------------------|-----------------|--------------------|--------| -|空值 | \N | N/A | true or false | NULL| -|not null | aaa | NULL | true | invalid data(filtered)| -|not null | aaa | NULL | false | NULL| -|not null | 1 or 10 | 1 | true or false | correct data| - -> 注意:10 虽然是一个超过范围的值,但是因为其类型符合 decimal的要求,所以 strict mode对其不产生影响。10 最后会在其他 ETL 处理流程中被过滤。但不会被 strict mode 过滤。 - - -### 返回结果 - -由于 Stream load 是一种同步的导入方式,所以导入的结果会通过创建导入的返回值直接返回给用户。 - -示例: - -``` -{ - "TxnId": 1003, - "Label": "b6f3bc78-0d2c-45d9-9e4c-faa0a0149bee", - "Status": "Success", - "ExistingJobStatus": "FINISHED", // optional - "Message": "OK", - "NumberTotalRows": 1000000, - "NumberLoadedRows": 1000000, - "NumberFilteredRows": 1, - "NumberUnselectedRows": 0, - "LoadBytes": 40888898, - "LoadTimeMs": 2144, - "BeginTxnTimeMs": 1, - "StreamLoadPutTimeMs": 2, - "ReadDataTimeMs": 325, - "WriteDataTimeMs": 1933, - "CommitAndPublishTimeMs": 106, - "ErrorURL": "http://192.168.1.1:8042/api/_load_error_log?file=__shard_0/error_log_insert_stmt_db18266d4d9b4ee5-abb00ddd64bdf005_db18266d4d9b4ee5_abb00ddd64bdf005" -} -``` - -下面主要解释了 Stream load 导入结果参数: - -+ TxnId:导入的事务ID。用户可不感知。 - -+ Label:导入 Label。由用户指定或系统自动生成。 - -+ Status:导入完成状态。 - - "Success":表示导入成功。 - - "Publish Timeout":该状态也表示导入已经完成,只是数据可能会延迟可见,无需重试。 - - "Label Already Exists":Label 重复,需更换 Label。 - - "Fail":导入失败。 - -+ ExistingJobStatus:已存在的 Label 对应的导入作业的状态。 - - 这个字段只有在当 Status 为 "Label Already Exists" 时才会显示。用户可以通过这个状态,知晓已存在 Label 对应的导入作业的状态。"RUNNING" 表示作业还在执行,"FINISHED" 表示作业成功。 - -+ Message:导入错误信息。 - -+ NumberTotalRows:导入总处理的行数。 - -+ NumberLoadedRows:成功导入的行数。 - -+ NumberFilteredRows:数据质量不合格的行数。 - -+ NumberUnselectedRows:被 where 条件过滤的行数。 - -+ LoadBytes:导入的字节数。 - -+ LoadTimeMs:导入完成时间。单位毫秒。 - -+ BeginTxnTimeMs:向Fe请求开始一个事务所花费的时间,单位毫秒。 - -+ StreamLoadPutTimeMs:向Fe请求获取导入数据执行计划所花费的时间,单位毫秒。 - -+ ReadDataTimeMs:读取数据所花费的时间,单位毫秒。 - -+ WriteDataTimeMs:执行写入数据操作所花费的时间,单位毫秒。 - -+ CommitAndPublishTimeMs:向Fe请求提交并且发布事务所花费的时间,单位毫秒。 - -+ ErrorURL:如果有数据质量问题,通过访问这个 URL 查看具体错误行。 - -> 注意:由于 Stream load 是同步的导入方式,所以并不会在 Doris 系统中记录导入信息,用户无法异步的通过查看导入命令看到 Stream load。使用时需监听创建导入请求的返回值获取导入结果。 - -### 取消导入 - -用户无法手动取消 Stream load,Stream load 在超时或者导入错误后会被系统自动取消。 - -## 相关系统配置 - -### FE 配置 - -+ stream\_load\_default\_timeout\_second - - 导入任务的超时时间(以秒为单位),导入任务在设定的 timeout 时间内未完成则会被系统取消,变成 CANCELLED。 - - 默认的 timeout 时间为 600 秒。如果导入的源文件无法在规定时间内完成导入,用户可以在 stream load 请求中设置单独的超时时间。 - - 或者调整 FE 的参数```stream_load_default_timeout_second``` 来设置全局的默认超时时间。 - -### BE 配置 - -+ streaming\_load\_max\_mb - - Stream load 的最大导入大小,默认为 10G,单位是 MB。如果用户的原始文件超过这个值,则需要调整 BE 的参数 ```streaming_load_max_mb```。 - -## 最佳实践 - -### 应用场景 - -使用 Stream load 的最合适场景就是原始文件在内存中,或者在磁盘中。其次,由于 Stream load 是一种同步的导入方式,所以用户如果希望用同步方式获取导入结果,也可以使用这种导入。 - -### 数据量 - -由于 Stream load 的原理是由 BE 发起的导入并分发数据,建议的导入数据量在 1G 到 10G 之间。由于默认的最大 Stream load 导入数据量为 10G,所以如果要导入超过 10G 的文件需要修改 BE 的配置 ```streaming_load_max_mb``` - -``` -比如:待导入文件大小为15G -修改 BE 配置 streaming_load_max_mb 为 16000 即可。 -``` - -Stream load 的默认超时为 300秒,按照 Doris 目前最大的导入限速来看,约超过 3G 的文件就需要修改导入任务默认超时时间了。 - -``` -导入任务超时时间 = 导入数据量 / 10M/s (具体的平均导入速度需要用户根据自己的集群情况计算) -例如:导入一个 10G 的文件 -timeout = 1000s 等于 10G / 10M/s -``` - -### 完整例子 -数据情况: 数据在发送导入请求端的本地磁盘路径 /home/store_sales 中,导入的数据量约为 15G,希望导入到数据库 bj_sales 的表 store_sales 中。 - -集群情况:Stream load 的并发数不受集群大小影响。 - -+ step1: 导入文件大小是否超过默认的最大导入大小10G - - ``` - 修改 BE conf - streaming_load_max_mb = 16000 - ``` -+ step2: 计算大概的导入时间是否超过默认 timeout 值 - - ``` - 导入时间 ≈ 15000 / 10 = 1500s - 超过了默认的 timeout 时间,需要修改 FE 的配置 - stream_load_default_timeout_second = 1500 - ``` - -+ step3:创建导入任务 - - ``` - curl --location-trusted -u user:password -T /home/store_sales -H "label:abc" http://abc.com:8000/api/bj_sales/store_sales/_stream_load - ``` - -## 常见问题 - -* Label Already Exists - - Stream load 的 Label 重复排查步骤如下: - - 1. 是否和其他导入方式已经存在的导入 Label 冲突: - - 由于 Doris 系统中导入的 Label 不区分导入方式,所以存在其他导入方式使用了相同 Label 的问题。 - - 通过 ```SHOW LOAD WHERE LABEL = “xxx”```,其中 xxx 为重复的 Label 字符串,查看是否已经存在一个 FINISHED 导入的 Label 和用户申请创建的 Label 相同。 - - 2. 是否 Stream load 同一个作业被重复提交了 - - 由于 Stream load 是 HTTP 协议提交创建导入任务,一般各个语言的 HTTP Client 均会自带请求重试逻辑。Doris 系统在接受到第一个请求后,已经开始操作 Stream load,但是由于没有及时返回给 Client 端结果, Client 端会发生再次重试创建请求的情况。这时候 Doris 系统由于已经在操作第一个请求,所以第二个请求已经就会被报 Label Already Exists 的情况。 - - 排查上述可能的方法:使用 Label 搜索 FE Master 的日志,看是否存在同一个 Label 出现了两次 ```redirect load action to destination= ``` 的情况。如果有就说明,请求被 Client 端重复提交了。 - - 建议用户根据当前请求的数据量,计算出大致导入的时间,并根据导入超时时间,将Client 端的请求超时间改成大于导入超时时间的值,避免请求被 Client 端多次提交。 - - 3. Connection reset 异常 - - 在社区版 0.14.0 及之前的版本在启用Http V2之后出现connection reset异常,因为Web 容器内置的是tomcat,Tomcat 在 307 (Temporary Redirect) 是有坑的,对这个协议实现是有问题的,所有在使用Stream load 导入大数据量的情况下会出现connect reset异常,这个是因为tomcat在做307跳转之前就开始了数据传输,这样就造成了BE收到的数据请求的时候缺少了认证信息,之后将内置容器改成了Jetty解决了这个问题,如果你遇到这个问题,请升级你的Doris或者禁用Http V2(`enable_http_server_v2=false`)。 - - 升级以后同时升级你程序的http client 版本到 `4.5.13`,在你的pom.xml文件中引入下面的依赖 - - ```xml - - org.apache.httpcomponents - httpclient - 4.5.13 - - ``` - - - - - diff --git a/docs/zh-CN/administrator-guide/materialized_view.md b/docs/zh-CN/administrator-guide/materialized_view.md deleted file mode 100644 index 1e0a169a07..0000000000 --- a/docs/zh-CN/administrator-guide/materialized_view.md +++ /dev/null @@ -1,488 +0,0 @@ ---- -{ - "title": "物化视图", - "language": "zh-CN" -} ---- - - - -# 物化视图 - -物化视图是将预先计算(根据定义好的 SELECT 语句)好的数据集,存储在 Doris 中的一个特殊的表。 - -物化视图的出现主要是为了满足用户,既能对原始明细数据的任意维度分析,也能快速的对固定维度进行分析查询。 - -## 适用场景 - -+ 分析需求覆盖明细数据查询以及固定维度查询两方面。 -+ 查询仅涉及表中的很小一部分列或行。 -+ 查询包含一些耗时处理操作,比如:时间很久的聚合操作等。 -+ 查询需要匹配不同前缀索引。 - -## 优势 - -+ 对于那些经常重复的使用相同的子查询结果的查询性能大幅提升。 -+ Doris自动维护物化视图的数据,无论是新的导入,还是删除操作都能保证base 表和物化视图表的数据一致性。无需任何额外的人工维护成本。 -+ 查询时,会自动匹配到最优物化视图,并直接从物化视图中读取数据。 - -*自动维护物化视图的数据会造成一些维护开销,会在后面的物化视图的局限性中展开说明。* - -## 物化视图 VS Rollup - -在没有物化视图功能之前,用户一般都是使用 Rollup 功能通过预聚合方式提升查询效率的。但是 Rollup 具有一定的局限性,他不能基于明细模型做预聚合。 - -物化视图则在覆盖了 Rollup 的功能的同时,还能支持更丰富的聚合函数。所以物化视图其实是 Rollup 的一个超集。 - -也就是说,之前 `ALTER TABLE ADD ROLLUP` 语法支持的功能现在均可以通过 `CREATE MATERIALIZED VIEW` 实现。 - -## 使用物化视图 - -Doris 系统提供了一整套对物化视图的 DDL 语法,包括创建,查看,删除。DDL 的语法和 PostgreSQL, Oracle都是一致的。 - -### 创建物化视图 - -这里首先你要根据你的查询语句的特点来决定创建一个什么样的物化视图。这里并不是说你的物化视图定义和你的某个查询语句一模一样就最好。这里有两个原则: - -1. 从查询语句中**抽象**出,多个查询共有的分组和聚合方式作为物化视图的定义。 -2. 不需要给所有维度组合都创建物化视图。 - -首先第一个点,一个物化视图如果抽象出来,并且多个查询都可以匹配到这张物化视图。这种物化视图效果最好。因为物化视图的维护本身也需要消耗资源。 - -如果物化视图只和某个特殊的查询很贴合,而其他查询均用不到这个物化视图。则会导致这张物化视图的性价比不高,既占用了集群的存储资源,还不能为更多的查询服务。 - -所以用户需要结合自己的查询语句,以及数据维度信息去抽象出一些物化视图的定义。 - -第二点就是,在实际的分析查询中,并不会覆盖到所有的维度分析。所以给常用的维度组合创建物化视图即可,从而到达一个空间和时间上的平衡。 - -创建物化视图是一个异步的操作,也就是说用户成功提交创建任务后,Doris 会在后台对存量的数据进行计算,直到创建成功。 - -具体的语法可以通过 Mysql 协议链接 Doris 并输入下面命令查看: - -``` -HELP CREATE MATERIALIZED VIEW -``` - -### 支持聚合函数 - -目前物化视图创建语句支持的聚合函数有: - -+ SUM, MIN, MAX (Version 0.12) -+ COUNT, BITMAP\_UNION, HLL\_UNION (Version 0.13) - -+ BITMAP\_UNION 的形式必须为:`BITMAP_UNION(TO_BITMAP(COLUMN))` column 列的类型只能是整数(largeint也不支持), 或者 `BITMAP_UNION(COLUMN)` 且 base 表为 AGG 模型。 -+ HLL\_UNION 的形式必须为:`HLL_UNION(HLL_HASH(COLUMN))` column 列的类型不能是 DECIMAL , 或者 `HLL_UNION(COLUMN)` 且 base 表为 AGG 模型。 - -### 更新策略 - -为保证物化视图表和 Base 表的数据一致性, Doris 会将导入,删除等对 base 表的操作都同步到物化视图表中。并且通过增量更新的方式来提升更新效率。通过事务方式来保证原子性。 - -比如如果用户通过 INSERT 命令插入数据到 base 表中,则这条数据会同步插入到物化视图中。当 base 表和物化视图表均写入成功后,INSERT 命令才会成功返回。 - -### 查询自动匹配 - -物化视图创建成功后,用户的查询不需要发生任何改变,也就是还是查询的 base 表。Doris 会根据当前查询的语句去自动选择一个最优的物化视图,从物化视图中读取数据并计算。 - -用户可以通过 EXPLAIN 命令来检查当前查询是否使用了物化视图。 - -物化视图中的聚合和查询中聚合的匹配关系: - -| 物化视图聚合 | 查询中聚合 | -| ---------- | -------- | -| sum | sum | -| min | min | -| max | max | -| count | count | -| bitmap\_union | bitmap\_union, bitmap\_union\_count, count(distinct) | -| hll\_union | hll\_raw\_agg, hll\_union\_agg, ndv, approx\_count\_distinct | - -其中 bitmap 和 hll 的聚合函数在查询匹配到物化视图后,查询的聚合算子会根据物化视图的表结构进行一个改写。详细见实例2。 - -### 查询物化视图 - -查看当前表都有哪些物化视图,以及他们的表结构都是什么样的。通过下面命令: - -``` -MySQL [test]> desc mv_test all; -+-----------+---------------+-----------------+----------+------+-------+---------+--------------+ -| IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra | -+-----------+---------------+-----------------+----------+------+-------+---------+--------------+ -| mv_test | DUP_KEYS | k1 | INT | Yes | true | NULL | | -| | | k2 | BIGINT | Yes | true | NULL | | -| | | k3 | LARGEINT | Yes | true | NULL | | -| | | k4 | SMALLINT | Yes | false | NULL | NONE | -| | | | | | | | | -| mv_2 | AGG_KEYS | k2 | BIGINT | Yes | true | NULL | | -| | | k4 | SMALLINT | Yes | false | NULL | MIN | -| | | k1 | INT | Yes | false | NULL | MAX | -| | | | | | | | | -| mv_3 | AGG_KEYS | k1 | INT | Yes | true | NULL | | -| | | to_bitmap(`k2`) | BITMAP | No | false | | BITMAP_UNION | -| | | | | | | | | -| mv_1 | AGG_KEYS | k4 | SMALLINT | Yes | true | NULL | | -| | | k1 | BIGINT | Yes | false | NULL | SUM | -| | | k3 | LARGEINT | Yes | false | NULL | SUM | -| | | k2 | BIGINT | Yes | false | NULL | MIN | -+-----------+---------------+-----------------+----------+------+-------+---------+--------------+ -``` - -可以看到当前 `mv_test` 表一共有三张物化视图:mv\_1, mv\_2 和 mv\_3,以及他们的表结构。 - -### 删除物化视图 - -如果用户不再需要物化视图,则可以通过命令删除物化视图。 - -具体的语法可以通过 Mysql 协议链接 Doris 输入下面命令查看: - -``` -HELP DROP MATERIALIZED VIEW -``` - -## 最佳实践1 - -使用物化视图一般分为以下几个步骤: - -1. 创建物化视图 -2. 异步检查物化视图是否构建完成 -3. 查询并自动匹配物化视图 - -**首先是第一步:创建物化视图** - -假设用户有一张销售记录明细表,存储了每个交易的交易id,销售员,售卖门店,销售时间,以及金额。建表语句为: - -``` -create table sales_records(record_id int, seller_id int, store_id int, sale_date date, sale_amt bigint) distributed by hash(record_id) properties("replication_num" = "1"); -``` -这张 `sales_records` 的表结构如下: - -``` -MySQL [test]> desc sales_records; -+-----------+--------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+-----------+--------+------+-------+---------+-------+ -| record_id | INT | Yes | true | NULL | | -| seller_id | INT | Yes | true | NULL | | -| store_id | INT | Yes | true | NULL | | -| sale_date | DATE | Yes | false | NULL | NONE | -| sale_amt | BIGINT | Yes | false | NULL | NONE | -+-----------+--------+------+-------+---------+-------+ -``` - -这时候如果用户经常对不同门店的销售量进行一个分析查询,则可以给这个 `sales_records` 表创建一张以售卖门店分组,对相同售卖门店的销售额求和的一个物化视图。创建语句如下: - -``` -MySQL [test]> create materialized view store_amt as select store_id, sum(sale_amt) from sales_records group by store_id; -``` - -后端返回下图,则说明创建物化视图任务提交成功。 - -``` -Query OK, 0 rows affected (0.012 sec) -``` - -**第二步:检查物化视图是否构建完成** - -由于创建物化视图是一个异步的操作,用户在提交完创建物化视图任务后,需要异步的通过命令检查物化视图是否构建完成。命令如下: - -``` -SHOW ALTER TABLE ROLLUP FROM db_name; (Version 0.12) -SHOW ALTER TABLE MATERIALIZED VIEW FROM db_name; (Version 0.13) -``` - -这个命令中 `db_name` 是一个参数, 你需要替换成自己真实的 db 名称。命令的结果是显示这个 db 的所有创建物化视图的任务。结果如下: - -``` -+-------+---------------+---------------------+---------------------+---------------+-----------------+----------+---------------+-----------+-------------------------------------------------------------------------------------------------------------------------+----------+---------+ -| JobId | TableName | CreateTime | FinishedTime | BaseIndexName | RollupIndexName | RollupId | TransactionId | State | Msg | Progress | Timeout | -+-------+---------------+---------------------+---------------------+---------------+-----------------+----------+---------------+-----------+-------------------------------------------------------------------------------------------------------------------------+----------+---------+ -| 22036 | sales_records | 2020-07-30 20:04:28 | 2020-07-30 20:04:57 | sales_records | store_amt | 22037 | 5008 | FINISHED | | NULL | 86400 | -+-------+---------------+---------------------+---------------------+---------------+-----------------+----------+---------------+-----------+-------------------------------------------------------------------------------------------------------------------------+----------+---------+ -``` - -其中 TableName 指的是物化视图的数据来自于哪个表,RollupIndexName 指的是物化视图的名称叫什么。其中比较重要的指标是 State。 - -当创建物化视图任务的 State 已经变成 FINISHED 后,就说明这个物化视图已经创建成功了。这就意味着,查询的时候有可能自动匹配到这张物化视图了。 - -**第三步:查询** - -当创建完成物化视图后,用户再查询不同门店的销售量时,就会直接从刚才创建的物化视图 `store_amt` 中读取聚合好的数据。达到提升查询效率的效果。 - -用户的查询依旧指定查询 `sales_records` 表,比如: - -``` -SELECT store_id, sum(sale_amt) FROM sales_records GROUP BY store_id; -``` - -上面查询就能自动匹配到 `store_amt`。用户可以通过下面命令,检验当前查询是否匹配到了合适的物化视图。 - -``` -EXPLAIN SELECT store_id, sum(sale_amt) FROM sales_records GROUP BY store_id; -+-----------------------------------------------------------------------------+ -| Explain String | -+-----------------------------------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS: `store_id` | sum(`sale_amt`) | -| PARTITION: UNPARTITIONED | -| | -| RESULT SINK | -| | -| 4:EXCHANGE | -| | -| PLAN FRAGMENT 1 | -| OUTPUT EXPRS: | -| PARTITION: HASH_PARTITIONED: `store_id` | -| | -| STREAM DATA SINK | -| EXCHANGE ID: 04 | -| UNPARTITIONED | -| | -| 3:AGGREGATE (merge finalize) | -| | output: sum( sum(`sale_amt`)) | -| | group by: `store_id` | -| | | -| 2:EXCHANGE | -| | -| PLAN FRAGMENT 2 | -| OUTPUT EXPRS: | -| PARTITION: RANDOM | -| | -| STREAM DATA SINK | -| EXCHANGE ID: 02 | -| HASH_PARTITIONED: `store_id` | -| | -| 1:AGGREGATE (update serialize) | -| | STREAMING | -| | output: sum(`sale_amt`) | -| | group by: `store_id` | -| | | -| 0:OlapScanNode | -| TABLE: sales_records | -| PREAGGREGATION: ON | -| partitions=1/1 | -| rollup: store_amt | -| tabletRatio=10/10 | -| tabletList=22038,22040,22042,22044,22046,22048,22050,22052,22054,22056 | -| cardinality=0 | -| avgRowSize=0.0 | -| numNodes=1 | -+-----------------------------------------------------------------------------+ -45 rows in set (0.006 sec) -``` - -其中最重要的就是 OlapScanNode 中的 rollup 属性。可以看到当前查询的 rollup 显示的是 `store_amt`。也就是说查询已经正确匹配到物化视图 `store_amt`, 并直接从物化视图中读取数据了。 - -## 最佳实践2 PV,UV - -业务场景: 计算广告的 UV,PV - -假设用户的原始广告点击数据存储在 Doris,那么针对广告 PV, UV 查询就可以通过创建 `bitmap_union` 的物化视图来提升查询速度。 - -通过下面语句首先创建一个存储广告点击数据明细的表,包含每条点击的点击事件,点击的是什么广告,通过什么渠道点击,以及点击的用户是谁。 - -``` -MySQL [test]> create table advertiser_view_record(time date, advertiser varchar(10), channel varchar(10), user_id int) distributed by hash(time) properties("replication_num" = "1"); -Query O -K, 0 rows affected (0.014 sec) -``` -原始的广告点击数据表结构为: - -``` -MySQL [test]> desc advertiser_view_record; -+------------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+------------+-------------+------+-------+---------+-------+ -| time | DATE | Yes | true | NULL | | -| advertiser | VARCHAR(10) | Yes | true | NULL | | -| channel | VARCHAR(10) | Yes | false | NULL | NONE | -| user_id | INT | Yes | false | NULL | NONE | -+------------+-------------+------+-------+---------+-------+ -4 rows in set (0.001 sec) -``` - -1. 创建物化视图 - - 由于用户想要查询的是广告的 UV 值,也就是需要对相同广告的用户进行一个精确去重,则查询一般为: - - ``` - SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel; - ``` - - 针对这种求 UV 的场景,我们就可以创建一个带 `bitmap_union` 的物化视图从而达到一个预先精确去重的效果。 - - 在 Doris 中,`count(distinct)` 聚合的结果和 `bitmap_union_count`聚合的结果是完全一致的。而`bitmap_union_count` 等于 `bitmap_union` 的结果求 count, 所以如果查询中**涉及到 `count(distinct)` 则通过创建带 `bitmap_union` 聚合的物化视图方可加快查询**。 - - 针对这个 case,则可以创建一个根据广告和渠道分组,对 `user_id` 进行精确去重的物化视图。 - - ``` - MySQL [test]> create materialized view advertiser_uv as select advertiser, channel, bitmap_union(to_bitmap(user_id)) from advertiser_view_record group by advertiser, channel; - Query OK, 0 rows affected (0.012 sec) - ``` - - *注意:因为本身 user\_id 是一个 INT 类型,所以在 Doris 中需要先将字段通过函数 `to_bitmap` 转换为 bitmap 类型然后才可以进行 `bitmap_union` 聚合。* - - 创建完成后, 广告点击明细表和物化视图表的表结构如下: - - ``` - MySQL [test]> desc advertiser_view_record all; - +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+ - | IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra | - +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+ - | advertiser_view_record | DUP_KEYS | time | DATE | Yes | true | NULL | | - | | | advertiser | VARCHAR(10) | Yes | true | NULL | | - | | | channel | VARCHAR(10) | Yes | false | NULL | NONE | - | | | user_id | INT | Yes | false | NULL | NONE | - | | | | | | | | | - | advertiser_uv | AGG_KEYS | advertiser | VARCHAR(10) | Yes | true | NULL | | - | | | channel | VARCHAR(10) | Yes | true | NULL | | - | | | to_bitmap(`user_id`) | BITMAP | No | false | | BITMAP_UNION | - +------------------------+---------------+----------------------+-------------+------+-------+---------+--------------+ - ``` - -2. 查询自动匹配 - - 当物化视图表创建完成后,查询广告 UV 时,Doris就会自动从刚才创建好的物化视图 `advertiser_uv` 中查询数据。比如原始的查询语句如下: - - ``` - SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel; - ``` - - 在选中物化视图后,实际的查询会转化为: - - ``` - SELECT advertiser, channel, bitmap_union_count(to_bitmap(user_id)) FROM advertiser_uv GROUP BY advertiser, channel; - ``` - - 通过 EXPLAIN 命令可以检验到 Doris 是否匹配到了物化视图: - - ``` - MySQL [test]> explain SELECT advertiser, channel, count(distinct user_id) FROM advertiser_view_record GROUP BY advertiser, channel; - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Explain String | - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | PLAN FRAGMENT 0 | - | OUTPUT EXPRS: `advertiser` | `channel` | bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`) | - | PARTITION: UNPARTITIONED | - | | - | RESULT SINK | - | | - | 4:EXCHANGE | - | | - | PLAN FRAGMENT 1 | - | OUTPUT EXPRS: | - | PARTITION: HASH_PARTITIONED: `advertiser`, `channel` | - | | - | STREAM DATA SINK | - | EXCHANGE ID: 04 | - | UNPARTITIONED | - | | - | 3:AGGREGATE (merge finalize) | - | | output: bitmap_union_count( bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`)) | - | | group by: `advertiser`, `channel` | - | | | - | 2:EXCHANGE | - | | - | PLAN FRAGMENT 2 | - | OUTPUT EXPRS: | - | PARTITION: RANDOM | - | | - | STREAM DATA SINK | - | EXCHANGE ID: 02 | - | HASH_PARTITIONED: `advertiser`, `channel` | - | | - | 1:AGGREGATE (update serialize) | - | | STREAMING | - | | output: bitmap_union_count(`default_cluster:test`.`advertiser_view_record`.`mv_bitmap_union_user_id`) | - | | group by: `advertiser`, `channel` | - | | | - | 0:OlapScanNode | - | TABLE: advertiser_view_record | - | PREAGGREGATION: ON | - | partitions=1/1 | - | rollup: advertiser_uv | - | tabletRatio=10/10 | - | tabletList=22084,22086,22088,22090,22092,22094,22096,22098,22100,22102 | - | cardinality=0 | - | avgRowSize=0.0 | - | numNodes=1 | - +-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - 45 rows in set (0.030 sec) - ``` - - 在 EXPLAIN 的结果中,首先可以看到 OlapScanNode 的 rollup 属性值为 advertiser_uv。也就是说,查询会直接扫描物化视图的数据。说明匹配成功。 - - 其次对于 `user_id` 字段求 `count(distinct)` 被改写为求 `bitmap_union_count(to_bitmap)`。也就是通过 bitmap 的方式来达到精确去重的效果。 - - -## 最佳实践3 - -业务场景:匹配更丰富的前缀索引 - -用户的原始表有 (k1, k2, k3) 三列。其中 k1, k2 为前缀索引列。这时候如果用户查询条件中包含 `where k1=1 and k2=2` 就能通过索引加速查询。 - -但是有些情况下,用户的过滤条件无法匹配到前缀索引,比如 `where k3=3`。则无法通过索引提升查询速度。 - -创建以 k3 作为第一列的物化视图就可以解决这个问题。 - -1. 创建物化视图 - - ``` - CREATE MATERIALIZED VIEW mv_1 as SELECT k3, k2, k1 FROM tableA ORDER BY k3; - ``` - - 通过上面语法创建完成后,物化视图中既保留了完整的明细数据,且物化视图的前缀索引为 k3 列。表结构如下: - - ``` - MySQL [test]> desc tableA all; - +-----------+---------------+-------+------+------+-------+---------+-------+ - | IndexName | IndexKeysType | Field | Type | Null | Key | Default | Extra | - +-----------+---------------+-------+------+------+-------+---------+-------+ - | tableA | DUP_KEYS | k1 | INT | Yes | true | NULL | | - | | | k2 | INT | Yes | true | NULL | | - | | | k3 | INT | Yes | true | NULL | | - | | | | | | | | | - | mv_1 | DUP_KEYS | k3 | INT | Yes | true | NULL | | - | | | k2 | INT | Yes | false | NULL | NONE | - | | | k1 | INT | Yes | false | NULL | NONE | - +-----------+---------------+-------+------+------+-------+---------+-------+ - ``` - -2. 查询匹配 - - 这时候如果用户的查询存在 k3 列的过滤条件是,比如: - - ``` - select k1, k2, k3 from table A where k3=3; - ``` - - 这时候查询就会直接从刚才创建的 mv_1 物化视图中读取数据。物化视图对 k3 是存在前缀索引的,查询效率也会提升。 - - -## 局限性 - -1. 物化视图的聚合函数的参数不支持表达式仅支持单列,比如: sum(a+b)不支持。 -2. 如果删除语句的条件列,在物化视图中不存在,则不能进行删除操作。如果一定要删除数据,则需要先将物化视图删除,然后方可删除数据。 -3. 单表上过多的物化视图会影响导入的效率:导入数据时,物化视图和 base 表数据是同步更新的,如果一张表的物化视图表超过10张,则有可能导致导入速度很慢。这就像单次导入需要同时导入10张表数据是一样的。 -4. 相同列,不同聚合函数,不能同时出现在一张物化视图中,比如:select sum(a), min(a) from table 不支持。 -5. 物化视图针对 Unique Key数据模型,只能改变列顺序,不能起到聚合的作用,所以在Unique Key模型上不能通过创建物化视图的方式对数据进行粗粒度聚合操作 - -## 异常错误 -1. DATA_QUALITY_ERR: "The data quality does not satisfy, please check your data" - 由于数据质量问题导致物化视图创建失败。 - 注意:bitmap类型仅支持正整型, 如果原始数据中存在负数,会导致物化视图创建失败 diff --git a/docs/zh-CN/administrator-guide/multi-tenant.md b/docs/zh-CN/administrator-guide/multi-tenant.md deleted file mode 100644 index fb1cac4d62..0000000000 --- a/docs/zh-CN/administrator-guide/multi-tenant.md +++ /dev/null @@ -1,222 +0,0 @@ ---- -{ - "title": "多租户和资源划分", - "language": "zh-CN" -} ---- - - - -# 多租户和资源划分 - -Doris 的多租户和资源隔离方案,主要目的是为了多用户在同一 Doris 集群内进行数据操作时,减少相互之间的干扰,能够将集群资源更合理的分配给各用户。 - -该方案主要分为两部分,一是集群内节点级别的资源组划分,二是针对单个查询的资源限制。 - -## Doris 中的节点 - -首先先简单介绍一下 Doris 的节点组成。一个 Doris 集群中有两类节点:Frontend(FE) 和 Backend(BE)。 - -FE 主要负责元数据管理、集群管理、用户请求的接入和查询计划的解析等工作。 - -BE 主要负责数据存储、查询计划的执行等工作。 - -FE 不参与用户数据的处理计算等工作,因此是一个资源消耗较低的节点。而 BE 负责所有的数据计算、任务处理,属于资源消耗型的节点。因此,本文所介绍的资源划分及资源限制方案,都是针对 BE 节点的。FE 节点因为资源消耗相对较低,并且还可以横向扩展,因此通常无需做资源上的隔离和限制,FE 节点由所有用户共享即可。 - -## 节点资源划分 - -节点资源划分,是指将一个 Doris 集群内的 BE 节点设置标签(Tag),标签相同的 BE 节点组成一个资源组(Resource Group)。资源组可以看作是数据存储和计算的一个管理单元。下面我们通过一个具体示例,来介绍资源组的使用方式。 - -1. 为 BE 节点设置标签 - - 假设当前 Doris 集群有 6 个 BE 节点。分别为 host[1-6]。在初始情况下,所有节点都属于一个默认资源组(Default)。 - - 我们可以使用以下命令将这6个节点划分成3个资源组:group_a、group_b、group_c: - - ```sql - alter system modify backend "host1:9050" set ("tag.location" = "group_a"); - alter system modify backend "host2:9050" set ("tag.location" = "group_a"); - alter system modify backend "host3:9050" set ("tag.location" = "group_b"); - alter system modify backend "host4:9050" set ("tag.location" = "group_b"); - alter system modify backend "host5:9050" set ("tag.location" = "group_c"); - alter system modify backend "host6:9050" set ("tag.location" = "group_c"); - ``` - - 这里我们将 `host[1-2]` 组成资源组 `group_a`,`host[3-4]` 组成资源组 `group_b`,`host[5-6]` 组成资源组 `group_c`。 - - > 注:一个 BE 只支持设置一个 Tag。 - -2. 按照资源组分配数据分布 - - 资源组划分好后。我们可以将用户数据的不同副本分布在不同资源组内。假设一张用户表 UserTable。我们希望在3个资源组内各存放一个副本,则可以通过如下建表语句实现: - - ```sql - create table UserTable - (k1 int, k2 int) - distributed by hash(k1) buckets 1 - properties( - "replication_allocation" - = - "tag.location.group_a:1, tag.location.group_b:1, tag.location.group_c:1" - ) - ``` - - 这样一来,表 UserTable 中的数据,将会以3副本的形式,分别存储在资源组 group_a、group_b、group_c所在的节点中。 - - 下图展示了当前的节点划分和数据分布: - - ``` - ┌────────────────────────────────────────────────────┐ - │ │ - │ ┌──────────────────┐ ┌──────────────────┐ │ - │ │ host1 │ │ host2 │ │ - │ │ ┌─────────────┐ │ │ │ │ - │ group_a │ │ replica1 │ │ │ │ │ - │ │ └─────────────┘ │ │ │ │ - │ │ │ │ │ │ - │ └──────────────────┘ └──────────────────┘ │ - │ │ - ├────────────────────────────────────────────────────┤ - ├────────────────────────────────────────────────────┤ - │ │ - │ ┌──────────────────┐ ┌──────────────────┐ │ - │ │ host3 │ │ host4 │ │ - │ │ │ │ ┌─────────────┐ │ │ - │ group_b │ │ │ │ replica2 │ │ │ - │ │ │ │ └─────────────┘ │ │ - │ │ │ │ │ │ - │ └──────────────────┘ └──────────────────┘ │ - │ │ - ├────────────────────────────────────────────────────┤ - ├────────────────────────────────────────────────────┤ - │ │ - │ ┌──────────────────┐ ┌──────────────────┐ │ - │ │ host5 │ │ host6 │ │ - │ │ │ │ ┌─────────────┐ │ │ - │ group_c │ │ │ │ replica3 │ │ │ - │ │ │ │ └─────────────┘ │ │ - │ │ │ │ │ │ - │ └──────────────────┘ └──────────────────┘ │ - │ │ - └────────────────────────────────────────────────────┘ - ``` - -3. 使用不同资源组进行数据查询 - - 在前两步执行完成后,我们就可以通过设置用户的资源使用权限,来限制某一用户的查询,只能使用指定资源组中的节点来执行。 - - 比如我们可以通过以下语句,限制 user1 只能使用 `group_a` 资源组中的节点进行数据查询,user2 只能使用 `group_b` 资源组,而 user3 可以同时使用 3 个资源组: - - ```sql - set property for 'user1' 'resource_tags.location' = 'group_a'; - set property for 'user2' 'resource_tags.location' = 'group_b'; - set property for 'user3' 'resource_tags.location' = 'group_a, group_b, group_c'; - ``` - - 设置完成后,user1 在发起对 UserTable 表的查询时,只会访问 `group_a` 资源组内节点上的数据副本,并且查询仅会使用 `group_a` 资源组内的节点计算资源。而 user3 的查询可以使用任意资源组内的副本和计算资源。 - - 这样,我们通过对节点的划分,以及对用户的资源使用限制,实现了不同用户查询上的物理资源隔离。更进一步,我们可以给不同的业务部门创建不同的用户,并限制每个用户使用不同的资源组。以避免不同业务部分之间使用资源干扰。比如集群内有一张业务表需要共享给所有9个业务部门使用,但是希望能够尽量避免不同部门之间的资源抢占。则我们可以为这张表创建3个副本,分别存储在3个资源组中。接下来,我们为9个业务部门创建9个用户,每3个用户限制使用一个资源组。这样,资源的竞争程度就由9降低到了3。 - - 另一方面,针对在线和离线任务的隔离。我们可以利用资源组的方式实现。比如我们可以将节点划分为 Online 和 Offline 两个资源组。表数据依然以3副本的方式存储,其中 2 个副本存放在 Online 资源组,1 个副本存放在 Offline 资源组。Online 资源组主要用于高并发低延迟的在线数据服务,而一些大查询或离线ETL操作,则可以使用 Offline 资源组中的节点执行。从而实现在统一集群内同时提供在线和离线服务的能力。 - -## 单查询资源限制 - -前面提到的资源组方法是节点级别的资源隔离和限制。而在资源组内,依然可能发生资源抢占问题。比如前文提到的将3个业务部门安排在同一资源组内。虽然降低了资源竞争程度,但是这3个部门的查询依然有可能相互影响。 - -因此,除了资源组方案外,Doris 还提供了对单查询的资源限制功能。 - -目前 Doris 对单查询的资源限制主要分为 CPU 和 内存限制两方面。 - -1. 内存限制 - - Doris 可以限制一个查询被允许使用的最大内存开销。以保证集群的内存资源不会被某一个查询全部占用。我们可以通过以下方式设置内存限制: - - ``` - // 设置会话变量 exec_mem_limit。则之后该会话内(连接内)的所有查询都使用这个内存限制。 - set exec_mem_limit=1G; - // 设置全局变量 exec_mem_limit。则之后所有新会话(新连接)的所有查询都使用这个内存限制。 - set global exec_mem_limit=1G; - // 在 SQL 中设置变量 exec_mem_limit。则该变量仅影响这个 SQL。 - select /*+ SET_VAR(exec_mem_limit=1G) */ id, name from tbl where xxx; - ``` - - 因为 Doris 的查询引擎是基于全内存的 MPP 查询框架。因此当一个查询的内存使用超过限制后,查询会被终止。因此,当一个查询无法在合理的内存限制下运行时,我们就需要通过一些 SQL 优化手段,或者集群扩容的方式来解决了。 - -2. CPU 限制 - - 用户可以通过以下方式限制查询的 CPU 资源: - - ``` - // 设置会话变量 cpu_resource_limit。则之后该会话内(连接内)的所有查询都使用这个CPU限制。 - set cpu_resource_limit = 2 - // 设置用户的属性 cpu_resource_limit,则所有该用户的查询情况都使用这个CPU限制。该属性的优先级高于会话变量 cpu_resource_limit - set property for 'user1' 'cpu_resource_limit' = '3'; - ``` - - `cpu_resource_limit` 的取值是一个相对值,取值越大则能够使用的 CPU 资源越多。但一个查询能使用的CPU上限也取决于表的分区分桶数。原则上,一个查询的最大 CPU 使用量和查询涉及到的 tablet 数量正相关。极端情况下,假设一个查询仅涉及到一个 tablet,则即使 `cpu_resource_limit` 设置一个较大值,也仅能使用 1 个 CPU 资源。 - -通过内存和CPU的资源限制。我们可以在一个资源组内,将用户的查询进行更细粒度的资源划分。比如我们可以让部分时效性要求不高,但是计算量很大的离线任务使用更少的CPU资源和更多的内存资源。而部分延迟敏感的在线任务,使用更多的CPU资源以及合理的内存资源。 - -## 最佳实践和向前兼容 - -Tag 划分和 CPU 限制是 0.15 版本中的新功能。为了保证可以从老版本平滑升级,Doris 做了如下的向前兼容: - -1. 每个 BE 节点会有一个默认的 Tag:`"tag.location": "default"`。 -2. 通过 `alter system add backend` 语句新增的 BE 节点也会默认设置 Tag:`"tag.location": "default"`。 -2. 所有表的副本分布默认修改为:`"tag.location.default:xx`。其中 xx 为原副本数量。 -3. 用户依然可以通过 `"replication_num" = "xx"` 在建表语句中指定副本数,这种属性将会自动转换成:`"tag.location.default:xx`。从而保证无需修改原建表语句。 -4. 默认情况下,单查询的内存限制为单节点2GB,CPU资源无限制,和原有行为保持一致。且用户的 `resource_tags.location` 属性为空,即默认情况下,用户可以访问任意 Tag 的 BE,和原有行为保持一致。 - -这里我们给出一个从原集群升级到 0.15 版本后,开始使用资源划分功能的步骤示例: - -1. 关闭数据修复与均衡逻辑 - - 因为升级后,BE的默认Tag为 `"tag.location": "default"`,而表的默认副本分布为:`"tag.location.default:xx`。所以如果直接修改 BE 的 Tag,系统会自动检测到副本分布的变化,从而开始数据重分布。这可能会占用部分系统资源。所以我们可以在修改 Tag 前,先关闭数据修复与均衡逻辑,以保证我们在规划资源时,不会有副本重分布的操作。 - - ``` - ADMIN SET FRONTEND CONFIG ("disable_balance" = "true"); - ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "true"); - ``` - -2. 设置 Tag 和表副本分布 - - 接下来可以通过 `alter system modify backend` 语句进行 BE 的 Tag 设置。以及通过 `alter table` 语句修改表的副本分布策略。示例如下: - - ``` - alter system modify backend "host1:9050, 1212:9050" set ("tag.location" = "group_a"); - alter table my_table modify partition p1 set ("replication_allocation" = "tag.location.group_a:2"); - ``` - -3. 开启数据修复与均衡逻辑 - - 在 Tag 和副本分布都设置完毕后,我们可以开启数据修复与均衡逻辑来触发数据的重分布了。 - - ``` - ADMIN SET FRONTEND CONFIG ("disable_balance" = "false"); - ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "false"); - ``` - - 该过程根据涉及到的数据量会持续一段时间。并且会导致部分 colocation table 无法进行 colocation 规划(因为副本在迁移中)。可以通过 ` show proc "/cluster_balance/"` 来查看进度。也可以通过 `show proc "/statistic"` 中 `UnhealthyTabletNum` 的数量来判断进度。当 `UnhealthyTabletNum` 降为 0 时,则代表数据重分布完毕。 - -4. 设置用户的资源标签权限。 - - 等数据重分布完毕后。我们就可以开始设置用户的资源标签权限了。因为默认情况下,用户的 `resource_tags.location` 属性为空,即可以访问任意 Tag 的 BE。所以在前面步骤中,不会影响到已有用户的正常查询。当 `resource_tags.location` 属性非空时,用户将被限制访问指定 Tag 的 BE。 - -通过以上4步,我们可以较为平滑的在原有集群升级后,使用资源划分功能。 diff --git a/docs/zh-CN/administrator-guide/operation/be-olap-error-code.md b/docs/zh-CN/administrator-guide/operation/be-olap-error-code.md deleted file mode 100644 index 7acfc77f93..0000000000 --- a/docs/zh-CN/administrator-guide/operation/be-olap-error-code.md +++ /dev/null @@ -1,265 +0,0 @@ ---- -{ - "title": "BE端OLAP函数的返回值说明", - "language": "zh-CN" -} - ---- - - - -# BE端OLAP函数的返回值说明 - - - -| 返回值名称 | 返回值 | 返回值说明 | -| ------------------------------------------------ | ------ | ------------------------------------------------------------ | -| OLAP_SUCCESS | 0 | 成功 | -| OLAP_ERR_OTHER_ERROR | -1 | 其他错误 | -| OLAP_REQUEST_FAILED | -2 | 请求失败 | -| 系统错误代码,例如文件系统内存和其他系统调用失败 | | | -| OLAP_ERR_OS_ERROR | -100 | 操作系统错误 | -| OLAP_ERR_DIR_NOT_EXIST | -101 | 目录不存在错误 | -| OLAP_ERR_FILE_NOT_EXIST | -102 | 文件不存在错误 | -| OLAP_ERR_CREATE_FILE_ERROR | -103 | 创建文件错误 | -| OLAP_ERR_MALLOC_ERROR | -104 | 内存分配错误 | -| OLAP_ERR_STL_ERROR | -105 | 标准模板库错误 | -| OLAP_ERR_IO_ERROR | -106 | IO错误 | -| OLAP_ERR_MUTEX_ERROR | -107 | 互斥锁错误 | -| OLAP_ERR_PTHREAD_ERROR | -108 | POSIX thread错误 | -| OLAP_ERR_NETWORK_ERROR | -109 | 网络异常错误 | -| OLAP_ERR_UB_FUNC_ERROR | -110 | | -| OLAP_ERR_COMPRESS_ERROR | -111 | 数据压缩错误 | -| OLAP_ERR_DECOMPRESS_ERROR | -112 | 数据解压缩错误 | -| OLAP_ERR_UNKNOWN_COMPRESSION_TYPE | -113 | 未知的数据压缩类型 | -| OLAP_ERR_MMAP_ERROR | -114 | 内存映射文件错误 | -| OLAP_ERR_RWLOCK_ERROR | -115 | 读写锁错误 | -| OLAP_ERR_READ_UNENOUGH | -116 | 读取内存不够异常 | -| OLAP_ERR_CANNOT_CREATE_DIR | -117 | 不能创建目录异常 | -| OLAP_ERR_UB_NETWORK_ERROR | -118 | 网络异常 | -| OLAP_ERR_FILE_FORMAT_ERROR | -119 | 文件格式异常 | -| OLAP_ERR_EVAL_CONJUNCTS_ERROR | -120 | | -| OLAP_ERR_COPY_FILE_ERROR | -121 | 拷贝文件错误 | -| OLAP_ERR_FILE_ALREADY_EXIST | -122 | 文件已经存在错误 | -| 通用错误代码 | | | -| OLAP_ERR_NOT_INITED | -200 | 不能初始化异常 | -| OLAP_ERR_FUNC_NOT_IMPLEMENTED | -201 | 函数不能执行异常 | -| OLAP_ERR_CALL_SEQUENCE_ERROR | -202 | 调用SEQUENCE异常 | -| OLAP_ERR_INPUT_PARAMETER_ERROR | -203 | 输入参数错误 | -| OLAP_ERR_BUFFER_OVERFLOW | -204 | 内存缓冲区溢出错误 | -| OLAP_ERR_CONFIG_ERROR | -205 | 配置错误 | -| OLAP_ERR_INIT_FAILED | -206 | 初始化失败 | -| OLAP_ERR_INVALID_SCHEMA | -207 | 无效的Schema | -| OLAP_ERR_CHECKSUM_ERROR | -208 | 检验值错误 | -| OLAP_ERR_SIGNATURE_ERROR | -209 | 签名错误 | -| OLAP_ERR_CATCH_EXCEPTION | -210 | 捕捉到异常 | -| OLAP_ERR_PARSE_PROTOBUF_ERROR | -211 | 解析Protobuf出错 | -| OLAP_ERR_SERIALIZE_PROTOBUF_ERROR | -212 | Protobuf序列化错误 | -| OLAP_ERR_WRITE_PROTOBUF_ERROR | -213 | Protobuf写错误 | -| OLAP_ERR_VERSION_NOT_EXIST | -214 | tablet版本不存在错误 | -| OLAP_ERR_TABLE_NOT_FOUND | -215 | 未找到tablet错误 | -| OLAP_ERR_TRY_LOCK_FAILED | -216 | 尝试锁失败 | -| OLAP_ERR_OUT_OF_BOUND | -218 | 内存越界 | -| OLAP_ERR_UNDERFLOW | -219 | underflow错误 | -| OLAP_ERR_FILE_DATA_ERROR | -220 | 文件数据错误 | -| OLAP_ERR_TEST_FILE_ERROR | -221 | 测试文件错误 | -| OLAP_ERR_INVALID_ROOT_PATH | -222 | 无效的根目录 | -| OLAP_ERR_NO_AVAILABLE_ROOT_PATH | -223 | 没有有效的根目录 | -| OLAP_ERR_CHECK_LINES_ERROR | -224 | 检查行数错误 | -| OLAP_ERR_INVALID_CLUSTER_INFO | -225 | 无效的Cluster信息 | -| OLAP_ERR_TRANSACTION_NOT_EXIST | -226 | 事务不存在 | -| OLAP_ERR_DISK_FAILURE | -227 | 磁盘错误 | -| OLAP_ERR_TRANSACTION_ALREADY_COMMITTED | -228 | 交易已提交 | -| OLAP_ERR_TRANSACTION_ALREADY_VISIBLE | -229 | 事务可见 | -| OLAP_ERR_VERSION_ALREADY_MERGED | -230 | 版本已合并 | -| OLAP_ERR_LZO_DISABLED | -231 | LZO已禁用 | -| OLAP_ERR_DISK_REACH_CAPACITY_LIMIT | -232 | 磁盘到达容量限制 | -| OLAP_ERR_TOO_MANY_TRANSACTIONS | -233 | 太多事务积压未完成 | -| OLAP_ERR_INVALID_SNAPSHOT_VERSION | -234 | 无效的快照版本 | -| OLAP_ERR_TOO_MANY_VERSION | -235 | tablet的数据版本超过了最大限制(默认500) | -| OLAP_ERR_NOT_INITIALIZED | -236 | 不能初始化 | -| OLAP_ERR_ALREADY_CANCELLED | -237 | 已经被取消 | -| OLAP_ERR_TOO_MANY_SEGMENTS | -238 | 通常出现在同一批导入数据量过大的情况,从而导致某一个 tablet 的 Segment 文件过多 | -| 命令执行异常代码 | | | -| OLAP_ERR_CE_CMD_PARAMS_ERROR | -300 | 命令参数错误 | -| OLAP_ERR_CE_BUFFER_TOO_SMALL | -301 | 缓冲区太多小文件 | -| OLAP_ERR_CE_CMD_NOT_VALID | -302 | 无效的命令 | -| OLAP_ERR_CE_LOAD_TABLE_ERROR | -303 | 加载数据表错误 | -| OLAP_ERR_CE_NOT_FINISHED | -304 | 命令没有执行成功 | -| OLAP_ERR_CE_TABLET_ID_EXIST | -305 | tablet Id不存在错误 | -| OLAP_ERR_CE_TRY_CE_LOCK_ERROR | -306 | 尝试获取执行命令锁错误 | -| Tablet错误异常代码 | | | -| OLAP_ERR_TABLE_VERSION_DUPLICATE_ERROR | -400 | tablet副本版本错误 | -| OLAP_ERR_TABLE_VERSION_INDEX_MISMATCH_ERROR | -401 | teblet版本索引不匹配异常 | -| OLAP_ERR_TABLE_INDEX_VALIDATE_ERROR | -402 | 这里不检查tablet的初始版本,因为如果在一个tablet进行schema-change时重新启动 BE,我们可能会遇到空tablet异常 | -| OLAP_ERR_TABLE_INDEX_FIND_ERROR | -403 | 无法获得第一个Block块位置 或者找到最后一行Block块失败会引发此异常 | -| OLAP_ERR_TABLE_CREATE_FROM_HEADER_ERROR | -404 | 无法加载Tablet的时候会触发此异常 | -| OLAP_ERR_TABLE_CREATE_META_ERROR | -405 | 无法创建Tablet(更改schema),Base tablet不存在 ,会触发此异常 | -| OLAP_ERR_TABLE_ALREADY_DELETED_ERROR | -406 | tablet已经被删除 | -| 存储引擎错误代码 | | | -| OLAP_ERR_ENGINE_INSERT_EXISTS_TABLE | -500 | 添加相同的tablet两次,添加tablet到相同数据目录两次,新tablet为空,旧tablet存在。会触发此异常 | -| OLAP_ERR_ENGINE_DROP_NOEXISTS_TABLE | -501 | 删除不存在的表 | -| OLAP_ERR_ENGINE_LOAD_INDEX_TABLE_ERROR | -502 | 加载tablet_meta失败,cumulative rowset无效的segment group meta,会引发此异常 | -| OLAP_ERR_TABLE_INSERT_DUPLICATION_ERROR | -503 | 表插入重复 | -| OLAP_ERR_DELETE_VERSION_ERROR | -504 | 删除版本错误 | -| OLAP_ERR_GC_SCAN_PATH_ERROR | -505 | GC扫描路径错误 | -| OLAP_ERR_ENGINE_INSERT_OLD_TABLET | -506 | 当 BE 正在重新启动并且较旧的tablet已添加到垃圾收集队列但尚未删除时,在这种情况下,由于 data_dirs 是并行加载的,稍后加载的tablet可能比以前加载的tablet旧,这不应被确认为失败,所以此时返回改代码 | -| Fetch Handler错误代码 | | | -| OLAP_ERR_FETCH_OTHER_ERROR | -600 | FetchHandler其他错误 | -| OLAP_ERR_FETCH_TABLE_NOT_EXIST | -601 | FetchHandler表不存在 | -| OLAP_ERR_FETCH_VERSION_ERROR | -602 | FetchHandler版本错误 | -| OLAP_ERR_FETCH_SCHEMA_ERROR | -603 | FetchHandler Schema错误 | -| OLAP_ERR_FETCH_COMPRESSION_ERROR | -604 | FetchHandler压缩错误 | -| OLAP_ERR_FETCH_CONTEXT_NOT_EXIST | -605 | FetchHandler上下文不存在 | -| OLAP_ERR_FETCH_GET_READER_PARAMS_ERR | -606 | FetchHandler GET读参数错误 | -| OLAP_ERR_FETCH_SAVE_SESSION_ERR | -607 | FetchHandler保存会话错误 | -| OLAP_ERR_FETCH_MEMORY_EXCEEDED | -608 | FetchHandler内存超出异常 | -| 读异常错误代码 | | | -| OLAP_ERR_READER_IS_UNINITIALIZED | -700 | 读不能初始化 | -| OLAP_ERR_READER_GET_ITERATOR_ERROR | -701 | 获取读迭代器错误 | -| OLAP_ERR_CAPTURE_ROWSET_READER_ERROR | -702 | 当前Rowset读错误 | -| OLAP_ERR_READER_READING_ERROR | -703 | 初始化列数据失败,cumulative rowset 的列数据无效 ,会返回该异常代码 | -| OLAP_ERR_READER_INITIALIZE_ERROR | -704 | 读初始化失败 | -| BaseCompaction异常代码信息 | | | -| OLAP_ERR_BE_VERSION_NOT_MATCH | -800 | BE Compaction 版本不匹配错误 | -| OLAP_ERR_BE_REPLACE_VERSIONS_ERROR | -801 | BE Compaction 替换版本错误 | -| OLAP_ERR_BE_MERGE_ERROR | -802 | BE Compaction合并错误 | -| OLAP_ERR_CAPTURE_ROWSET_ERROR | -804 | 找不到Rowset对应的版本 | -| OLAP_ERR_BE_SAVE_HEADER_ERROR | -805 | BE Compaction保存Header错误 | -| OLAP_ERR_BE_INIT_OLAP_DATA | -806 | BE Compaction 初始化OLAP数据错误 | -| OLAP_ERR_BE_TRY_OBTAIN_VERSION_LOCKS | -807 | BE Compaction 尝试获得版本锁错误 | -| OLAP_ERR_BE_NO_SUITABLE_VERSION | -808 | BE Compaction 没有合适的版本 | -| OLAP_ERR_BE_TRY_BE_LOCK_ERROR | -809 | 其他base compaction正在运行,尝试获取锁失败 | -| OLAP_ERR_BE_INVALID_NEED_MERGED_VERSIONS | -810 | 无效的Merge版本 | -| OLAP_ERR_BE_ERROR_DELETE_ACTION | -811 | BE执行删除操作错误 | -| OLAP_ERR_BE_SEGMENTS_OVERLAPPING | -812 | cumulative point有重叠的Rowset异常 | -| OLAP_ERR_BE_CLONE_OCCURRED | -813 | 将压缩任务提交到线程池后可能会发生克隆任务,并且选择用于压缩的行集可能会发生变化。 在这种情况下,不应执行当前的压缩任务。 返回该代码 | -| PUSH异常代码 | | | -| OLAP_ERR_PUSH_INIT_ERROR | -900 | 无法初始化读取器,无法创建表描述符,无法初始化内存跟踪器,不支持的文件格式类型,无法打开扫描仪,无法获取元组描述符,为元组分配内存失败,都会返回该代码 | -| OLAP_ERR_PUSH_DELTA_FILE_EOF | -901 | | -| OLAP_ERR_PUSH_VERSION_INCORRECT | -902 | PUSH版本不正确 | -| OLAP_ERR_PUSH_SCHEMA_MISMATCH | -903 | PUSH Schema不匹配 | -| OLAP_ERR_PUSH_CHECKSUM_ERROR | -904 | PUSH校验值错误 | -| OLAP_ERR_PUSH_ACQUIRE_DATASOURCE_ERROR | -905 | PUSH 获取数据源错误 | -| OLAP_ERR_PUSH_CREAT_CUMULATIVE_ERROR | -906 | PUSH 创建CUMULATIVE错误代码 | -| OLAP_ERR_PUSH_BUILD_DELTA_ERROR | -907 | 推送的增量文件有错误的校验码 | -| OLAP_ERR_PUSH_VERSION_ALREADY_EXIST | -908 | PUSH的版本已经存在 | -| OLAP_ERR_PUSH_TABLE_NOT_EXIST | -909 | PUSH的表不存在 | -| OLAP_ERR_PUSH_INPUT_DATA_ERROR | -910 | PUSH的数据无效,可能是长度,数据类型等问题 | -| OLAP_ERR_PUSH_TRANSACTION_ALREADY_EXIST | -911 | 将事务提交给引擎时,发现Rowset存在,但Rowset ID 不一样 | -| OLAP_ERR_PUSH_BATCH_PROCESS_REMOVED | -912 | 删除了推送批处理过程 | -| OLAP_ERR_PUSH_COMMIT_ROWSET | -913 | PUSH Commit Rowset | -| OLAP_ERR_PUSH_ROWSET_NOT_FOUND | -914 | PUSH Rowset没有发现 | -| SegmentGroup异常代码 | | | -| OLAP_ERR_INDEX_LOAD_ERROR | -1000 | 加载索引错误 | -| OLAP_ERR_INDEX_EOF | -1001 | | -| OLAP_ERR_INDEX_CHECKSUM_ERROR | -1002 | 校验码验证错误,加载索引对应的Segment 错误。 | -| OLAP_ERR_INDEX_DELTA_PRUNING | -1003 | 索引增量修剪 | -| OLAPData异常代码信息 | | | -| OLAP_ERR_DATA_ROW_BLOCK_ERROR | -1100 | 数据行Block块错误 | -| OLAP_ERR_DATA_FILE_TYPE_ERROR | -1101 | 数据文件类型错误 | -| OLAP_ERR_DATA_EOF | -1102 | | -| OLAP数据写错误代码 | | | -| OLAP_ERR_WRITER_INDEX_WRITE_ERROR | -1200 | 索引写错误 | -| OLAP_ERR_WRITER_DATA_WRITE_ERROR | -1201 | 数据写错误 | -| OLAP_ERR_WRITER_ROW_BLOCK_ERROR | -1202 | Row Block块写错误 | -| OLAP_ERR_WRITER_SEGMENT_NOT_FINALIZED | -1203 | 在添加新Segment之前,上一Segment未完成 | -| RowBlock错误代码 | | | -| OLAP_ERR_ROWBLOCK_DECOMPRESS_ERROR | -1300 | Rowblock解压缩错误 | -| OLAP_ERR_ROWBLOCK_FIND_ROW_EXCEPTION | -1301 | 获取Block Entry失败 | -| OLAP_ERR_ROWBLOCK_READ_INFO_ERROR | -1302 | 读取Rowblock信息错误 | -| Tablet元数据错误 | | | -| OLAP_ERR_HEADER_ADD_VERSION | -1400 | tablet元数据增加版本 | -| OLAP_ERR_HEADER_DELETE_VERSION | -1401 | tablet元数据删除版本 | -| OLAP_ERR_HEADER_ADD_PENDING_DELTA | -1402 | tablet元数据添加待处理增量 | -| OLAP_ERR_HEADER_ADD_INCREMENTAL_VERSION | -1403 | tablet元数据添加自增版本 | -| OLAP_ERR_HEADER_INVALID_FLAG | -1404 | tablet元数据无效的标记 | -| OLAP_ERR_HEADER_PUT | -1405 | tablet元数据PUT操作 | -| OLAP_ERR_HEADER_DELETE | -1406 | tablet元数据DELETE操作 | -| OLAP_ERR_HEADER_GET | -1407 | tablet元数据GET操作 | -| OLAP_ERR_HEADER_LOAD_INVALID_KEY | -1408 | tablet元数据加载无效Key | -| OLAP_ERR_HEADER_FLAG_PUT | -1409 | | -| OLAP_ERR_HEADER_LOAD_JSON_HEADER | -1410 | tablet元数据加载JSON Header | -| OLAP_ERR_HEADER_INIT_FAILED | -1411 | tablet元数据Header初始化失败 | -| OLAP_ERR_HEADER_PB_PARSE_FAILED | -1412 | tablet元数据 Protobuf解析失败 | -| OLAP_ERR_HEADER_HAS_PENDING_DATA | -1413 | tablet元数据有待处理的数据 | -| TabletSchema异常代码信息 | | | -| OLAP_ERR_SCHEMA_SCHEMA_INVALID | -1500 | Tablet Schema无效 | -| OLAP_ERR_SCHEMA_SCHEMA_FIELD_INVALID | -1501 | Tablet Schema 字段无效 | -| SchemaHandler异常代码信息 | | | -| OLAP_ERR_ALTER_MULTI_TABLE_ERR | -1600 | ALTER 多表错误 | -| OLAP_ERR_ALTER_DELTA_DOES_NOT_EXISTS | -1601 | 获取所有数据源失败,Tablet无版本 | -| OLAP_ERR_ALTER_STATUS_ERR | -1602 | 检查行号失败,内部排序失败,行块排序失败,这些都会返回该代码 | -| OLAP_ERR_PREVIOUS_SCHEMA_CHANGE_NOT_FINISHED | -1603 | 先前的Schema更改未完成 | -| OLAP_ERR_SCHEMA_CHANGE_INFO_INVALID | -1604 | Schema变更信息无效 | -| OLAP_ERR_QUERY_SPLIT_KEY_ERR | -1605 | 查询 Split key 错误 | -| OLAP_ERR_DATA_QUALITY_ERR | -1606 | 模式更改/物化视图期间数据质量问题导致的错误 | -| Column File错误代码 | | | -| OLAP_ERR_COLUMN_DATA_LOAD_BLOCK | -1700 | 加载列数据块错误 | -| OLAP_ERR_COLUMN_DATA_RECORD_INDEX | -1701 | 加载数据记录索引错误 | -| OLAP_ERR_COLUMN_DATA_MAKE_FILE_HEADER | -1702 | | -| OLAP_ERR_COLUMN_DATA_READ_VAR_INT | -1703 | 无法从Stream中读取列数据 | -| OLAP_ERR_COLUMN_DATA_PATCH_LIST_NUM | -1704 | | -| OLAP_ERR_COLUMN_STREAM_EOF | -1705 | 如果数据流结束,返回该代码 | -| OLAP_ERR_COLUMN_READ_STREAM | -1706 | 块大小大于缓冲区大小,压缩剩余大小小于Stream头大小,读取流失败 这些情况下会抛出该异常 | -| OLAP_ERR_COLUMN_STREAM_NOT_EXIST | -1707 | Stream为空,不存在,未找到数据流 等情况下返回该异常代码 | -| OLAP_ERR_COLUMN_VALUE_NULL | -1708 | 列值为空异常 | -| OLAP_ERR_COLUMN_SEEK_ERROR | -1709 | 如果通过schema变更添加列,由于schema变更可能导致列索引存在,返回这个异常代码 | -| DeleteHandler错误代码 | | | -| OLAP_ERR_DELETE_INVALID_CONDITION | -1900 | 删除条件无效 | -| OLAP_ERR_DELETE_UPDATE_HEADER_FAILED | -1901 | 删除更新Header错误 | -| OLAP_ERR_DELETE_SAVE_HEADER_FAILED | -1902 | 删除保存header错误 | -| OLAP_ERR_DELETE_INVALID_PARAMETERS | -1903 | 删除参数无效 | -| OLAP_ERR_DELETE_INVALID_VERSION | -1904 | 删除版本无效 | -| Cumulative Handler错误代码 | | | -| OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS | -2000 | Cumulative没有合适的版本 | -| OLAP_ERR_CUMULATIVE_REPEAT_INIT | -2001 | Cumulative Repeat 初始化错误 | -| OLAP_ERR_CUMULATIVE_INVALID_PARAMETERS | -2002 | Cumulative参数无效 | -| OLAP_ERR_CUMULATIVE_FAILED_ACQUIRE_DATA_SOURCE | -2003 | Cumulative获取数据源失败 | -| OLAP_ERR_CUMULATIVE_INVALID_NEED_MERGED_VERSIONS | -2004 | Cumulative无有效需要合并版本 | -| OLAP_ERR_CUMULATIVE_ERROR_DELETE_ACTION | -2005 | Cumulative删除操作错误 | -| OLAP_ERR_CUMULATIVE_MISS_VERSION | -2006 | rowsets缺少版本 | -| OLAP_ERR_CUMULATIVE_CLONE_OCCURRED | -2007 | 将压缩任务提交到线程池后可能会发生克隆任务,并且选择用于压缩的行集可能会发生变化。 在这种情况下,不应执行当前的压缩任务。否则会触发改异常 | -| OLAPMeta异常代码 | | | -| OLAP_ERR_META_INVALID_ARGUMENT | -3000 | 元数据参数无效 | -| OLAP_ERR_META_OPEN_DB | -3001 | 打开DB元数据错误 | -| OLAP_ERR_META_KEY_NOT_FOUND | -3002 | 元数据key没发现 | -| OLAP_ERR_META_GET | -3003 | GET元数据错误 | -| OLAP_ERR_META_PUT | -3004 | PUT元数据错误 | -| OLAP_ERR_META_ITERATOR | -3005 | 元数据迭代器错误 | -| OLAP_ERR_META_DELETE | -3006 | 删除元数据错误 | -| OLAP_ERR_META_ALREADY_EXIST | -3007 | 元数据已经存在错误 | -| Rowset错误代码 | | | -| OLAP_ERR_ROWSET_WRITER_INIT | -3100 | Rowset写初始化错误 | -| OLAP_ERR_ROWSET_SAVE_FAILED | -3101 | Rowset保存失败 | -| OLAP_ERR_ROWSET_GENERATE_ID_FAILED | -3102 | Rowset生成ID失败 | -| OLAP_ERR_ROWSET_DELETE_FILE_FAILED | -3103 | Rowset删除文件失败 | -| OLAP_ERR_ROWSET_BUILDER_INIT | -3104 | Rowset初始化构建失败 | -| OLAP_ERR_ROWSET_TYPE_NOT_FOUND | -3105 | Rowset类型没有发现 | -| OLAP_ERR_ROWSET_ALREADY_EXIST | -3106 | Rowset已经存在 | -| OLAP_ERR_ROWSET_CREATE_READER | -3107 | Rowset创建读对象失败 | -| OLAP_ERR_ROWSET_INVALID | -3108 | Rowset无效 | -| OLAP_ERR_ROWSET_LOAD_FAILED | -3109 | Rowset加载失败 | -| OLAP_ERR_ROWSET_READER_INIT | -3110 | Rowset读对象初始化失败 | -| OLAP_ERR_ROWSET_READ_FAILED | -3111 | Rowset读失败 | -| OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION | -3112 | Rowset无效的事务状态 | - - - diff --git a/docs/zh-CN/administrator-guide/operation/disk-capacity.md b/docs/zh-CN/administrator-guide/operation/disk-capacity.md deleted file mode 100644 index 8616f63575..0000000000 --- a/docs/zh-CN/administrator-guide/operation/disk-capacity.md +++ /dev/null @@ -1,163 +0,0 @@ ---- -{ - "title": "磁盘空间管理", - "language": "zh-CN" -} ---- - - - -# 磁盘空间管理 - -本文档主要介绍和磁盘存储空间有关的系统参数和处理策略。 - -Doris 的数据磁盘空间如果不加以控制,会因磁盘写满而导致进程挂掉。因此我们监测磁盘的使用率和剩余空间,通过设置不同的警戒水位,来控制 Doris 系统中的各项操作,尽量避免发生磁盘被写满的情况。 - -## 名词解释 - -* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。 -* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。 -* Data Dir:数据目录,在 BE 配置文件 `be.conf` 的 `storage_root_path` 中指定的各个数据目录。通常一个数据目录对应一个磁盘、因此下文中 **磁盘** 也指代一个数据目录。 - -## 基本原理 - -BE 定期(每隔一分钟)会向 FE 汇报一次磁盘使用情况。FE 记录这些统计值,并根据这些统计值,限制不同的操作请求。 - -在 FE 中分别设置了 **高水位(High Watermark)** 和 **危险水位(Flood Stage)** 两级阈值。危险水位高于高水位。当磁盘使用率高于高水位时,Doris 会限制某些操作的执行(如副本均衡等)。而如果高于危险水位,则会禁止某些操作的执行(如导入)。 - -同时,在 BE 上也设置了 **危险水位(Flood Stage)**。考虑到 FE 并不能完全及时的检测到 BE 上的磁盘使用情况,以及无法控制某些 BE 自身运行的操作(如 Compaction)。因此 BE 上的危险水位用于 BE 主动拒绝和停止某些操作,达到自我保护的目的。 - -## FE 参数 - -**高水位:** - -``` -storage_high_watermark_usage_percent 默认 85 (85%)。 -storage_min_left_capacity_bytes 默认 2GB。 -``` - -当磁盘空间使用率**大于** `storage_high_watermark_usage_percent`,**或者** 磁盘空间剩余大小**小于** `storage_min_left_capacity_bytes` 时,该磁盘不会再被作为以下操作的目的路径: - -* Tablet 均衡操作(Balance) -* Colocation 表数据分片的重分布(Relocation) -* Decommission - -**危险水位:** - -``` -storage_flood_stage_usage_percent 默认 95 (95%)。 -storage_flood_stage_left_capacity_bytes 默认 1GB。 -``` - -当磁盘空间使用率**大于** `storage_flood_stage_usage_percent`,**并且** 磁盘空间剩余大小**小于** `storage_flood_stage_left_capacity_bytes` 时,该磁盘不会再被作为以下操作的目的路径,并禁止某些操作: - -* Tablet 均衡操作(Balance) -* Colocation 表数据分片的重分布(Relocation) -* 副本补齐 -* 恢复操作(Restore) -* 数据导入(Load/Insert) - -## BE 参数 - -**危险水位:** - -``` -capacity_used_percent_flood_stage 默认 95 (95%)。 -capacity_min_left_bytes_flood_stage 默认 1GB。 -``` - -当磁盘空间使用率**大于** `storage_flood_stage_usage_percent`,**并且** 磁盘空间剩余大小**小于** `storage_flood_stage_left_capacity_bytes` 时,该磁盘上的以下操作会被禁止: - -* Base/Cumulative Compaction。 -* 数据写入。包括各种导入操作。 -* Clone Task。通常发生于副本修复或均衡时。 -* Push Task。发生在 Hadoop 导入的 Loading 阶段,下载文件。 -* Alter Task。Schema Change 或 Rollup 任务。 -* Download Task。恢复操作的 Downloading 阶段。 - -## 磁盘空间释放 - -当磁盘空间高于高水位甚至危险水位后,很多操作都会被禁止。此时可以尝试通过以下方式减少磁盘使用率,恢复系统。 - -* 删除表或分区 - - 通过删除表或分区的方式,能够快速降低磁盘空间使用率,恢复集群。**注意:只有 `DROP` 操作可以达到快速降低磁盘空间使用率的目的,`DELETE` 操作不可以。** - - ``` - DROP TABLE tbl; - ALTER TABLE tbl DROP PARTITION p1; - ``` - -* 扩容 BE - - 扩容后,数据分片会自动均衡到磁盘使用率较低的 BE 节点上。扩容操作会根据数据量及节点数量不同,在数小时或数天后使集群到达均衡状态。 - -* 修改表或分区的副本 - - 可以将表或分区的副本数降低。比如默认3副本可以降低为2副本。该方法虽然降低了数据的可靠性,但是能够快速的降低磁盘使用率,使集群恢复正常。该方法通常用于紧急恢复系统。请在恢复后,通过扩容或删除数据等方式,降低磁盘使用率后,将副本数恢复为 3。 - - 修改副本操作为瞬间生效,后台会自动异步的删除多余的副本。 - - ``` - ALTER TABLE tbl MODIFY PARTITION p1 SET("replication_num" = "2"); - ``` - -* 删除多余文件 - - 当 BE 进程已经因为磁盘写满而挂掉并无法启动时(此现象可能因 FE 或 BE 检测不及时而发生)。需要通过删除数据目录下的一些临时文件,保证 BE 进程能够启动。以下目录中的文件可以直接删除: - - * log/:日志目录下的日志文件。 - * snapshot/: 快照目录下的快照文件。 - * trash/:回收站中的文件。 - - **这种操作会对 [从 BE 回收站中恢复数据](./tablet-restore-tool.md) 产生影响。** - - 如果BE还能够启动,则可以使用`ADMIN CLEAN TRASH ON(BackendHost:BackendHeartBeatPort);`来主动清理临时文件,会清理 **所有** trash文件和过期snapshot文件,**这将影响从回收站恢复数据的操作** 。 - - 如果不手动执行`ADMIN CLEAN TRASH`,系统仍将会在几分钟至几十分钟内自动执行清理,这里分为两种情况: - * 如果磁盘占用未达到 **危险水位(Flood Stage)** 的90%,则会清理过期trash文件和过期snapshot文件,此时会保留一些近期文件而不影响恢复数据。 - * 如果磁盘占用已达到 **危险水位(Flood Stage)** 的90%,则会清理 **所有** trash文件和过期snapshot文件, **此时会影响从回收站恢复数据的操作** 。 - 自动执行的时间间隔可以通过配置项中的`max_garbage_sweep_interval`和`max_garbage_sweep_interval`更改。 - - 出现由于缺少trash文件而导致恢复失败的情况时,可能返回如下结果: - - ``` - {"status": "Fail","msg": "can find tablet path in trash"} - ``` - -* 删除数据文件(危险!!!) - - 当以上操作都无法释放空间时,需要通过删除数据文件来释放空间。数据文件在指定数据目录的 `data/` 目录下。删除数据分片(Tablet)必须先确保该 Tablet 至少有一个副本是正常的,否则**删除唯一副本会导致数据丢失**。假设我们要删除 id 为 12345 的 Tablet: - - * 找到 Tablet 对应的目录,通常位于 `data/shard_id/tablet_id/` 下。如: - - ```data/0/12345/``` - - * 记录 tablet id 和 schema hash。其中 schema hash 为上一步目录的下一级目录名。如下为 352781111: - - ```data/0/12345/352781111``` - - * 删除数据目录: - - ```rm -rf data/0/12345/``` - - * 删除 Tablet 元数据(具体参考 [Tablet 元数据管理工具](./tablet-meta-tool.md)) - - ```./lib/meta_tool --operation=delete_header --root_path=/path/to/root_path --tablet_id=12345 --schema_hash= 352781111``` \ No newline at end of file diff --git a/docs/zh-CN/administrator-guide/operation/doris-error-code.md b/docs/zh-CN/administrator-guide/operation/doris-error-code.md deleted file mode 100644 index 122c0d1b59..0000000000 --- a/docs/zh-CN/administrator-guide/operation/doris-error-code.md +++ /dev/null @@ -1,179 +0,0 @@ ---- -{ - "title": "Doris错误代码表", - "language": "zh-CN" -} - ---- - - - -# Doris错误代码表 - -| 错误码 | 错误信息 | -| :----- | :----------------------------------------------------------- | -| 1005 | 创建表格失败,在返回错误信息中给出具体原因 | -| 1007 | 数据库已经存在,不能创建同名的数据库 | -| 1008 | 数据库不存在,无法删除 | -| 1044 | 数据库对用户未授权,不能访问 | -| 1045 | 用户名及密码不匹配,不能访问系统 | -| 1046 | 没有指定要查询的目标数据库 | -| 1047 | 用户输入了无效的操作指令 | -| 1049 | 用户指定了无效的数据库 | -| 1050 | 数据表已经存在 | -| 1051 | 无效的数据表 | -| 1052 | 指定的列名有歧义,不能唯一确定对应列 | -| 1053 | 为Semi-Join/Anti-Join查询指定了非法的数据列 | -| 1054 | 指定的列在表中不存在 | -| 1058 | 查询语句中选择的列数目与查询结果的列数目不一致 | -| 1060 | 列名重复 | -| 1064 | 没有存活的Backend节点 | -| 1066 | 查询语句中出现了重复的表别名 | -| 1094 | 线程ID无效 | -| 1095 | 非线程的拥有者不能终止线程的运行 | -| 1096 | 查询语句没有指定要查询或操作的数据表 | -| 1102 | 数据库名不正确 | -| 1104 | 数据表名不正确 | -| 1105 | 其它错误 | -| 1110 | 子查询中指定了重复的列 | -| 1111 | 在Where从句中非法使用聚合函数 | -| 1113 | 新建表的列集合不能为空 | -| 1115 | 使用了不支持的字符集 | -| 1130 | 客户端使用了未被授权的IP地址来访问系统 | -| 1132 | 无权限修改用户密码 | -| 1141 | 撤销用户权限时指定了用户不具备的权限 | -| 1142 | 用户执行了未被授权的操作 | -| 1166 | 列名不正确 | -| 1193 | 使用了无效的系统变量名 | -| 1203 | 用户使用的活跃连接数超过了限制 | -| 1211 | 不允许创建新用户 | -| 1227 | 拒绝访问,用户执行了无权限的操作 | -| 1228 | 会话变量不能通过SET GLOBAL指令来修改 | -| 1229 | 全局变量应通过SET GLOBAL指令来修改 | -| 1230 | 相关的系统变量没有缺省值 | -| 1231 | 给某系统变量设置了无效值 | -| 1232 | 给某系统变量设置了错误数据类型的值 | -| 1248 | 没有给内联视图设置别名 | -| 1251 | 客户端不支持服务器请求的身份验证协议;请升级MySQL客户端 | -| 1286 | 配置的存储引擎不正确 | -| 1298 | 配置的时区不正确 | -| 1347 | 对象与期望的类型不匹配 | -| 1353 | SELECT和视图的字段列表具有不同的列数 | -| 1364 | 字段不允许NULL值,但是没有设置缺省值 | -| 1372 | 密码长度不够 | -| 1396 | 用户执行的操作运行失败 | -| 1471 | 指定表不允许插入数据 | -| 1507 | 删除不存在的分区,且没有指定如果存在才删除的条件 | -| 1508 | 无法删除所有分区,请改用DROP TABLE | -| 1517 | 出现了重复的分区名字 | -| 1567 | 分区的名字不正确 | -| 1621 | 指定的系统变量是只读的 | -| 1735 | 表中不存在指定的分区名 | -| 1748 | 不能将数据插入具有空分区的表中。使用“ SHOW PARTITIONS FROM tbl”来查看此表的当前分区 | -| 1749 | 表分区不存在 | -| 5000 | 指定的表不是OLAP表 | -| 5001 | 指定的PROC路径无效 | -| 5002 | 必须在列置换中明确指定列名 | -| 5003 | Key列应排在Value列之前 | -| 5004 | 表至少应包含1个Key列 | -| 5005 | 集群ID无效 | -| 5006 | 无效的查询规划 | -| 5007 | 冲突的查询规划 | -| 5008 | 数据插入提示:仅适用于有分区的数据表 | -| 5009 | PARTITION子句对于INSERT到未分区表中无效 | -| 5010 | 列数不等于SELECT语句的选择列表数 | -| 5011 | 无法解析表引用 | -| 5012 | 指定的值不是一个有效数字 | -| 5013 | 不支持的时间单位 | -| 5014 | 表状态不正常 | -| 5015 | 分区状态不正常 | -| 5016 | 分区上存在数据导入任务 | -| 5017 | 指定列不是Key列 | -| 5018 | 值的格式无效 | -| 5019 | 数据副本与版本不匹配 | -| 5021 | BE节点已离线 | -| 5022 | 非分区表中的分区数不是1 | -| 5023 | alter语句中无任何操作 | -| 5024 | 任务执行超时 | -| 5025 | 数据插入操作失败 | -| 5026 | 通过SELECT语句创建表时使用了不支持的数据类型 | -| 5027 | 没有设置指定的参数 | -| 5028 | 没有找到指定的集群 | -| 5030 | 某用户没有访问集群的权限 | -| 5031 | 没有指定参数或参数无效 | -| 5032 | 没有指定集群实例数目 | -| 5034 | 集群名已经存在 | -| 5035 | 集群已经存在 | -| 5036 | 集群中BE节点不足 | -| 5037 | 删除集群之前,必须删除集群中的所有数据库 | -| 5037 | 集群中不存在这个ID的BE节点 | -| 5038 | 没有指定集群名字 | -| 5040 | 未知的集群 | -| 5041 | 没有集群名字 | -| 5042 | 没有权限 | -| 5043 | 实例数目应大于0 | -| 5046 | 源集群不存在 | -| 5047 | 目标集群不存在 | -| 5048 | 源数据库不存在 | -| 5049 | 目标数据库不存在 | -| 5050 | 没有选择集群,请输入集群 | -| 5051 | 应先将源数据库连接到目标数据库 | -| 5052 | 集群内部错误:BE节点错误信息 | -| 5053 | 没有从源数据库到目标数据库的迁移任务 | -| 5054 | 指定数据库已经连接到目标数据库,或正在迁移数据 | -| 5055 | 数据连接或者数据迁移不能在同一集群内执行 | -| 5056 | 不能删除数据库:它被关联至其它数据库或正在迁移数据 | -| 5056 | 不能重命名数据库:它被关联至其它数据库或正在迁移数据 | -| 5056 | 集群中BE节点不足 | -| 5056 | 集群内已存在指定数目的BE节点 | -| 5059 | 集群中存在处于下线状态的BE节点 | -| 5062 | 不正确的群集名称(名称'default_cluster'是保留名称) | -| 5063 | 类型名不正确 | -| 5064 | 通用错误提示 | -| 5063 | Colocate功能已被管理员禁用 | -| 5063 | colocate数据表不存在 | -| 5063 | Colocate表必须是OLAP表 | -| 5063 | Colocate表应该具有同样的副本数目 | -| 5063 | Colocate表应该具有同样的分桶数目 | -| 5063 | Colocate表的分区列数目必须一致 | -| 5063 | Colocate表的分区列的数据类型必须一致 | -| 5064 | 指定表不是colocate表 | -| 5065 | 指定的操作是无效的 | -| 5065 | 指定的时间单位是非法的,正确的单位包括:HOUR / DAY / WEEK / MONTH | -| 5066 | 动态分区起始值应该小于0 | -| 5066 | 动态分区起始值不是有效的数字 | -| 5066 | 动态分区结束值应该大于0 | -| 5066 | 动态分区结束值不是有效的数字 | -| 5066 | 动态分区结束值为空 | -| 5067 | 动态分区分桶数应该大于0 | -| 5067 | 动态分区分桶值不是有效的数字 | -| 5066 | 动态分区分桶值为空 | -| 5068 | 是否允许动态分区的值不是有效的布尔值:true或者false | -| 5069 | 指定的动态分区名前缀是非法的 | -| 5070 | 指定的操作被禁止了 | -| 5071 | 动态分区副本数应该大于0 | -| 5072 | 动态分区副本值不是有效的数字 | -| 5073 | 原始创建表stmt为空 | -| 5074 | 创建历史动态分区参数:create_history_partition无效,期望的是:true或者false | -| 5076 | 指定的保留历史分区时间段为空 | -| 5077 | 指定的保留历史分区时间段无效 | -| 5078 | 指定的保留历史分区时间段必须是成对的时间 | -| 5079 | 指定的保留历史分区时间段对应位置的第一个时间比第二个时间大(起始时间大于结束时间) | - diff --git a/docs/zh-CN/administrator-guide/operation/metadata-operation.md b/docs/zh-CN/administrator-guide/operation/metadata-operation.md deleted file mode 100644 index b2534325d7..0000000000 --- a/docs/zh-CN/administrator-guide/operation/metadata-operation.md +++ /dev/null @@ -1,403 +0,0 @@ ---- -{ - "title": "元数据运维", - "language": "zh-CN" -} ---- - - - -# 元数据运维 - -本文档主要介绍在实际生产环境中,如何对 Doris 的元数据进行管理。包括 FE 节点建议的部署方式、一些常用的操作方法、以及常见错误的解决方法。 - -在阅读本文当前,请先阅读 [Doris 元数据设计文档](../../internal/metadata-design.md) 了解 Doris 元数据的工作原理。 - -## 重要提示 - -* 当前元数据的设计是无法向后兼容的。即如果新版本有新增的元数据结构变动(可以查看 FE 代码中的 `FeMetaVersion.java` 文件中是否有新增的 VERSION),那么在升级到新版本后,通常是无法再回滚到旧版本的。所以,在升级 FE 之前,请务必按照 [升级文档](../../installing/upgrade.md) 中的操作,测试元数据兼容性。 - -## 元数据目录结构 - -我们假设在 fe.conf 中指定的 `meta_dir` 的路径为 `/path/to/palo-meta`。那么一个正常运行中的 Doris 集群,元数据的目录结构应该如下: - -``` -/path/to/palo-meta/ - |-- bdb/ - | |-- 00000000.jdb - | |-- je.config.csv - | |-- je.info.0 - | |-- je.info.0.lck - | |-- je.lck - | `-- je.stat.csv - `-- image/ - |-- ROLE - |-- VERSION - `-- image.xxxx -``` - -1. bdb 目录 - - 我们将 [bdbje](https://www.oracle.com/technetwork/database/berkeleydb/overview/index-093405.html) 作为一个分布式的 kv 系统,存放元数据的 journal。这个 bdb 目录相当于 bdbje 的 “数据目录”。 - - 其中 `.jdb` 后缀的是 bdbje 的数据文件。这些数据文件会随着元数据 journal 的不断增多而越来越多。当 Doris 定期做完 image 后,旧的日志就会被删除。所以正常情况下,这些数据文件的总大小从几 MB 到几 GB 不等(取决于使用 Doris 的方式,如导入频率等)。当数据文件的总大小大于 10GB,则可能需要怀疑是否是因为 image 没有成功,或者分发 image 失败导致的历史 journal 一直无法删除。 - - `je.info.0` 是 bdbje 的运行日志。这个日志中的时间是 UTC+0 时区的。我们可能在后面的某个版本中修复这个问题。通过这个日志,也可以查看一些 bdbje 的运行情况。 - -2. image 目录 - - image 目录用于存放 Doris 定期生成的元数据镜像文件。通常情况下,你会看到有一个 `image.xxxxx` 的镜像文件。其中 `xxxxx` 是一个数字。这个数字表示该镜像包含 `xxxxx` 号之前的所有元数据 journal。而这个文件的生成时间(通过 `ls -al` 查看即可)通常就是镜像的生成时间。 - - 你也可能会看到一个 `image.ckpt` 文件。这是一个正在生成的元数据镜像。通过 `du -sh` 命令应该可以看到这个文件大小在不断变大,说明镜像内容正在写入这个文件。当镜像写完后,会自动重名为一个新的 `image.xxxxx` 并替换旧的 image 文件。 - - 只有角色为 Master 的 FE 才会主动定期生成 image 文件。每次生成完后,都会推送给其他非 Master 角色的 FE。当确认其他所有 FE 都收到这个 image 后,Master FE 会删除 bdbje 中旧的元数据 journal。所以,如果 image 生成失败,或者 image 推送给其他 FE 失败时,都会导致 bdbje 中的数据不断累积。 - - `ROLE` 文件记录了 FE 的类型(FOLLOWER 或 OBSERVER),是一个文本文件。 - - `VERSION` 文件记录了这个 Doris 集群的 cluster id,以及用于各个节点之间访问认证的 token,也是一个文本文件。 - - `ROLE` 文件和 `VERSION` 文件只可能同时存在,或同时不存在(如第一次启动时)。 - -## 基本操作 - -### 启动单节点 FE - -单节点 FE 是最基本的一种部署方式。一个完整的 Doris 集群,至少需要一个 FE 节点。当只有一个 FE 节点时,这个节点的类型为 Follower,角色为 Master。 - -1. 第一次启动 - - 1. 假设在 fe.conf 中指定的 `meta_dir` 的路径为 `/path/to/palo-meta`。 - 2. 确保 `/path/to/palo-meta` 已存在,权限正确,且目录为空。 - 3. 直接通过 `sh bin/start_fe.sh` 即可启动。 - 4. 启动后,你应该可以在 fe.log 中看到如下日志: - - * Palo FE starting... - * image does not exist: /path/to/palo-meta/image/image.0 - * transfer from INIT to UNKNOWN - * transfer from UNKNOWN to MASTER - * the very first time to open bdb, dbname is 1 - * start fencing, epoch number is 1 - * finish replay in xxx msec - * QE service start - * thrift server started - - 以上日志不一定严格按照这个顺序,但基本类似。 - - 5. 单节点 FE 的第一次启动通常不会遇到问题。如果你没有看到以上日志,一般来说是没有仔细按照文档步骤操作,请仔细阅读相关 wiki。 - -2. 重启 - - 1. 直接使用 `sh bin/start_fe.sh` 可以重新启动已经停止的 FE 节点。 - 2. 重启后,你应该可以在 fe.log 中看到如下日志: - - * Palo FE starting... - * finished to get cluster id: xxxx, role: FOLLOWER and node name: xxxx - * 如果重启前还没有 image 产生,则会看到: - * image does not exist: /path/to/palo-meta/image/image.0 - - * 如果重启前有 image 产生,则会看到: - * start load image from /path/to/palo-meta/image/image.xxx. is ckpt: false - * finished load image in xxx ms - - * transfer from INIT to UNKNOWN - * replayed journal id is xxxx, replay to journal id is yyyy - * transfer from UNKNOWN to MASTER - * finish replay in xxx msec - * master finish replay journal, can write now. - * begin to generate new image: image.xxxx - * start save image to /path/to/palo-meta/image/image.ckpt. is ckpt: true - * finished save image /path/to/palo-meta/image/image.ckpt in xxx ms. checksum is xxxx - * push image.xxx to other nodes. totally xx nodes, push successed xx nodes - * QE service start - * thrift server started - - 以上日志不一定严格按照这个顺序,但基本类似。 - -3. 常见问题 - - 对于单节点 FE 的部署,启停通常不会遇到什么问题。如果有问题,请先参照相关 wiki,仔细核对你的操作步骤。 - -### 添加 FE - -添加 FE 流程在 [部署和升级文档](../../installing/install-deploy.md#增加%20FE%20节点) 有详细介绍,不再赘述。这里主要说明一些注意事项,以及常见问题。 - -1. 注意事项 - - * 在添加新的 FE 之前,一定先确保当前的 Master FE 运行正常(连接是否正常,JVM 是否正常,image 生成是否正常,bdbje 数据目录是否过大等等) - * 第一次启动新的 FE,一定确保添加了 `--helper` 参数指向 Master FE。再次启动时可不用添加 `--helper`。(如果指定了 `--helper`,FE 会直接询问 helper 节点自己的角色,如果没有指定,FE会尝试从 `palo-meta/image/` 目录下的 `ROLE` 和 `VERSION` 文件中获取信息)。 - * 第一次启动新的 FE,一定确保这个 FE 的 `meta_dir` 已经创建、权限正确且为空。 - * 启动新的 FE,和执行 `ALTER SYSTEM ADD FOLLOWER/OBSERVER` 语句在元数据添加 FE,这两个操作的顺序没有先后要求。如果先启动了新的 FE,而没有执行语句,则新的 FE 日志中会一直滚动 `current node is not added to the group. please add it first.` 字样。当执行语句后,则会进入正常流程。 - * 请确保前一个 FE 添加成功后,再添加下一个 FE。 - * 建议直接连接到 MASTER FE 执行 `ALTER SYSTEM ADD FOLLOWER/OBSERVER` 语句。 - -2. 常见问题 - - 1. this node is DETACHED - - 当第一次启动一个待添加的 FE 时,如果 Master FE 上的 palo-meta/bdb 中的数据很大,则可能在待添加的 FE 日志中看到 `this node is DETACHED.` 字样。这时,bdbje 正在复制数据,你可以看到待添加的 FE 的 `bdb/` 目录正在变大。这个过程通常会在数分钟不等(取决于 bdbje 中的数据量)。之后,fe.log 中可能会有一些 bdbje 相关的错误堆栈信息。如果最终日志中显示 `QE service start` 和 `thrift server started`,则通常表示启动成功。可以通过 mysql-client 连接这个 FE 尝试操作。如果没有出现这些字样,则可能是 bdbje 复制日志超时等问题。这时,直接再次重启这个 FE,通常即可解决问题。 - - 2. 各种原因导致添加失败 - - * 如果添加的是 OBSERVER,因为 OBSERVER 类型的 FE 不参与元数据的多数写,理论上可以随意启停。因此,对于添加 OBSERVER 失败的情况。可以直接杀死 OBSERVER FE 的进程,清空 OBSERVER 的元数据目录后,重新进行一遍添加流程。 - - * 如果添加的是 FOLLOWER,因为 FOLLOWER 是参与元数据多数写的。所以有可能FOLLOWER 已经加入 bdbje 选举组内。如果这时只有两个 FOLLOWER 节点(包括 MASTER),那么停掉一个 FE,可能导致另一个 FE 也因无法进行多数写而退出。此时,我们应该先通过 `ALTER SYSTEM DROP FOLLOWER` 命令,从元数据中删除新添加的 FOLLOWER 节点,然后再杀死 FOLLOWER 进程,清空元数据,重新进行一遍添加流程。 - - -### 删除 FE - -通过 `ALTER SYSTEM DROP FOLLOWER/OBSERVER` 命令即可删除对应类型的 FE。以下有几点注意事项: - -* 对于 OBSERVER 类型的 FE,直接 DROP 即可,无风险。 - -* 对于 FOLLOWER 类型的 FE。首先,应保证在有奇数个 FOLLOWER 的情况下(3个或以上),开始删除操作。 - - 1. 如果删除非 MASTER 角色的 FE,建议连接到 MASTER FE,执行 DROP 命令,再杀死进程即可。 - 2. 如果要删除 MASTER FE,先确认有奇数个 FOLLOWER FE 并且运行正常。然后先杀死 MASTER FE 的进程。这时会有某一个 FE 被选举为 MASTER。在确认剩下的 FE 运行正常后,连接到新的 MASTER FE,执行 DROP 命令删除之前老的 MASTER FE 即可。 - -## 高级操作 - -### 故障恢复 - -FE 有可能因为某些原因出现无法启动 bdbje、FE 之间无法同步等问题。现象包括无法进行元数据写操作、没有 MASTER 等等。这时,我们需要手动操作来恢复 FE。手动恢复 FE 的大致原理,是先通过当前 `meta_dir` 中的元数据,启动一个新的 MASTER,然后再逐台添加其他 FE。请严格按照如下步骤操作: - -1. 首先,停止所有 FE 进程,同时停止一切业务访问。保证在元数据恢复期间,不会因为外部访问导致其他不可预期的问题。 - -2. 确认哪个 FE 节点的元数据是最新: - - * 首先,**务必先备份所有 FE 的 `meta_dir` 目录。** - * 通常情况下,Master FE 的元数据是最新的。可以查看 `meta_dir/image` 目录下,image.xxxx 文件的后缀,数字越大,则表示元数据越新。 - * 通常,通过比较所有 FOLLOWER FE 的 image 文件,找出最新的元数据即可。 - * 之后,我们要使用这个拥有最新元数据的 FE 节点,进行恢复。 - * 如果使用 OBSERVER 节点的元数据进行恢复会比较麻烦,建议尽量选择 FOLLOWER 节点。 - -3. 以下操作都在由第2步中选择出来的 FE 节点上进行。 - - 1. 如果该节点是一个 OBSERVER,先将 `meta_dir/image/ROLE` 文件中的 `role=OBSERVER` 改为 `role=FOLLOWER`。(从 OBSERVER 节点恢复会比较麻烦,先按这里的步骤操作,后面会有单独说明) - 2. 在 fe.conf 中添加配置:`metadata_failure_recovery=true`。 - 3. 执行 `sh bin/start_fe.sh` 启动这个 FE。 - 4. 如果正常,这个 FE 会以 MASTER 的角色启动,类似于前面 `启动单节点 FE` 一节中的描述。在 fe.log 应该会看到 `transfer from XXXX to MASTER` 等字样。 - 5. 启动完成后,先连接到这个 FE,执行一些查询导入,检查是否能够正常访问。如果不正常,有可能是操作有误,建议仔细阅读以上步骤,用之前备份的元数据再试一次。如果还是不行,问题可能就比较严重了。 - 6. 如果成功,通过 `show frontends;` 命令,应该可以看到之前所添加的所有 FE,并且当前 FE 是 master。 - 7. 将 fe.conf 中的 `metadata_failure_recovery=true` 配置项删除,或者设置为 `false`,然后重启这个 FE(**重要**)。 - - - > 如果你是从一个 OBSERVER 节点的元数据进行恢复的,那么完成如上步骤后,通过 `show frontends;` 语句你会发现,当前这个 FE 的角色为 OBSERVER,但是 `IsMaster` 显示为 `true`。这是因为,这里看到的 “OBSERVER” 是记录在 Doris 的元数据中的,而是否是 master,是记录在 bdbje 的元数据中的。因为我们是从一个 OBSERVER 节点恢复的,所以这里出现了不一致。请按如下步骤修复这个问题(这个问题我们会在之后的某个版本修复): - - > 1. 先把除了这个 “OBSERVER” 以外的所有 FE 节点 DROP 掉。 - > 2. 通过 `ADD FOLLOWER` 命令,添加一个新的 FOLLOWER FE,假设在 hostA 上。 - > 3. 在 hostA 上启动一个全新的 FE,通过 `--helper` 的方式加入集群。 - > 4. 启动成功后,通过 `show frontends;` 语句,你应该能看到两个 FE,一个是之前的 OBSERVER,一个是新添加的 FOLLOWER,并且 OBSERVER 是 master。 - > 5. 确认这个新的 FOLLOWER 是可以正常工作之后,用这个新的 FOLLOWER 的元数据,重新执行一遍故障恢复操作。 - > 6. 以上这些步骤的目的,其实就是人为的制造出一个 FOLLOWER 节点的元数据,然后用这个元数据,重新开始故障恢复。这样就避免了从 OBSERVER 恢复元数据所遇到的不一致的问题。 - - > `metadata_failure_recovery=true` 的含义是,清空 "bdbje" 的元数据。这样 bdbje 就不会再联系之前的其他 FE 了,而作为一个独立的 FE 启动。这个参数只有在恢复启动时才需要设置为 true。恢复完成后,一定要设置为 false,否则一旦重启,bdbje 的元数据又会被清空,导致其他 FE 无法正常工作。 - -4. 第3步执行成功后,我们再通过 `ALTER SYSTEM DROP FOLLOWER/OBSERVER` 命令,将之前的其他的 FE 从元数据删除后,按加入新 FE 的方式,重新把这些 FE 添加一遍。 - -5. 如果以上操作正常,则恢复完毕。 - -### FE 类型变更 - -如果你需要将当前已有的 FOLLOWER/OBSERVER 类型的 FE,变更为 OBSERVER/FOLLOWER 类型,请先按照前面所述的方式删除 FE,再添加对应类型的 FE 即可 - -### FE 迁移 - -如果你需要将一个 FE 从当前节点迁移到另一个节点,分以下几种情况。 - -1. 非 MASTER 节点的 FOLLOWER,或者 OBSERVER 迁移 - - 直接添加新的 FOLLOWER/OBSERVER 成功后,删除旧的 FOLLOWER/OBSERVER 即可。 - -2. 单节点 MASTER 迁移 - - 当只有一个 FE 时,参考 `故障恢复` 一节。将 FE 的 palo-meta 目录拷贝到新节点上,按照 `故障恢复` 一节中,步骤3的方式启动新的 MASTER - -3. 一组 FOLLOWER 从一组节点迁移到另一组新的节点 - - 在新的节点上部署 FE,通过添加 FOLLOWER 的方式先加入新节点。再逐台 DROP 掉旧节点即可。在逐台 DROP 的过程中,MASTER 会自动选择在新的 FOLLOWER 节点上。 - -### 更换 FE 端口 - -FE 目前有以下几个端口 - -* edit_log_port:bdbje 的通信端口 -* http_port:http 端口,也用于推送 image -* rpc_port:FE 的 thrift server port -* query_port:Mysql 连接端口 - -1. edit_log_port - - 如果需要更换这个端口,则需要参照 `故障恢复` 一节中的操作,进行恢复。因为该端口已经被持久化到 bdbje 自己的元数据中(同时也记录在 Doris 自己的元数据中),需要通过设置 `metadata_failure_recovery=true` 来清空 bdbje 的元数据。 - -2. http_port - - 所有 FE 的 http_port 必须保持一致。所以如果要修改这个端口,则所有 FE 都需要修改并重启。修改这个端口,在多 FOLLOWER 部署的情况下会比较复杂(涉及到鸡生蛋蛋生鸡的问题...),所以不建议有这种操作。如果必须,直接按照 `故障恢复` 一节中的操作吧。 - -3. rpc_port - - 修改配置后,直接重启 FE 即可。Master FE 会通过心跳将新的端口告知 BE。只有 Master FE 的这个端口会被使用。但仍然建议所有 FE 的端口保持一致。 - -4. query_port - - 修改配置后,直接重启 FE 即可。这个只影响到 mysql 的连接目标。 - - -### 从 FE 内存中恢复元数据 - -在某些极端情况下,磁盘上 image 文件可能会损坏,但是内存中的元数据是完好的,此时我们可以先从内存中 dump 出元数据,再替换掉磁盘上的 image 文件,来恢复元数据,整个**不停查询服务**的操作步骤如下: -1. 集群停止所有 Load,Create,Alter 操作 -2. 执行以下命令,从 Master FE 内存中 dump 出元数据:(下面称为 image_mem) -``` -curl -u $root_user:$password http://$master_hostname:8030/dump -``` -3. 执行以下命令,验证生成的 image_mem 文件的完整性和正确性: -``` -sh start_fe.sh --image path_to_image_mem -``` -> 注意:`path_to_image_mem` 是 image_mem 文件的路径。 -> -> 如果文件有效会输出 `Load image success. Image file /absolute/path/to/image.xxxxxx is valid`。 -> -> 如果文件无效会输出 `Load image failed. Image file /absolute/path/to/image.xxxxxx is invalid`。 -4. 依次用 image_mem 文件替换掉 OBSERVER/FOLLOWER FE 节点上`meta_dir/image`目录下的 image 文件,重启 FOLLOWER FE 节点, -确认元数据和查询服务都正常 -5. 用 image_mem 文件替换掉 Master FE 节点上`meta_dir/image`目录下的 image 文件,重启 Master FE 节点, -确认 FE Master 切换正常, Master FE 节点可以通过 checkpoint 正常生成新的 image 文件 -6. 集群恢复所有 Load,Create,Alter 操作 - -**注意:如果 Image 文件很大,整个操作过程耗时可能会很长,所以在此期间,要确保 Master FE 不会通过 checkpoint 生成新的 image 文件。 -当观察到 Master FE 节点上 `meta_dir/image`目录下的 `image.ckpt` 文件快和 `image.xxx` 文件一样大时,可以直接删除掉`image.ckpt` 文件。** - -### 查看 BDBJE 中的数据 - -FE 的元数据日志以 Key-Value 的方式存储在 BDBJE 中。某些异常情况下,可能因为元数据错误而无法启动 FE。在这种情况下,Doris 提供一种方式可以帮助用户查询 BDBJE 中存储的数据,以方便进行问题排查。 - -首先需在 fe.conf 中增加配置:`enable_bdbje_debug_mode=true`,之后通过 `sh start_fe.sh --daemon` 启动 FE。 - -此时,FE 将进入 debug 模式,仅会启动 http server 和 MySQL server,并打开 BDBJE 实例,但不会进行任何元数据的加载及后续其他启动流程。 - -这是,我们可以通过访问 FE 的 web 页面,或通过 MySQL 客户端连接到 Doris 后,通过 `show proc /bdbje;` 来查看 BDBJE 中存储的数据。 - -``` -mysql> show proc "/bdbje"; -+----------+---------------+---------+ -| DbNames | JournalNumber | Comment | -+----------+---------------+---------+ -| 110589 | 4273 | | -| epochDB | 4 | | -| metricDB | 430694 | | -+----------+---------------+---------+ -``` - -第一级目录会展示 BDBJE 中所有的 database 名称,以及每个 database 中的 entry 数量。 - -``` -mysql> show proc "/bdbje/110589"; -+-----------+ -| JournalId | -+-----------+ -| 1 | -| 2 | - -... -| 114858 | -| 114859 | -| 114860 | -| 114861 | -+-----------+ -4273 rows in set (0.06 sec) -``` - -进入第二级,则会罗列指定 database 下的所有 entry 的 key。 - -``` -mysql> show proc "/bdbje/110589/114861"; -+-----------+--------------+---------------------------------------------+ -| JournalId | OpType | Data | -+-----------+--------------+---------------------------------------------+ -| 114861 | OP_HEARTBEAT | org.apache.doris.persist.HbPackage@6583d5fb | -+-----------+--------------+---------------------------------------------+ -1 row in set (0.05 sec) -``` - -第三级则可以展示指定 key 的 value 信息。 - -## 最佳实践 - -FE 的部署推荐,在 [安装与部署文档](../../installing/install-deploy.md) 中有介绍,这里再做一些补充。 - -* **如果你并不十分了解 FE 元数据的运行逻辑,或者没有足够 FE 元数据的运维经验,我们强烈建议在实际使用中,只部署一个 FOLLOWER 类型的 FE 作为 MASTER,其余 FE 都是 OBSERVER,这样可以减少很多复杂的运维问题!** 不用过于担心 MASTER 单点故障导致无法进行元数据写操作。首先,如果你配置合理,FE 作为 java 进程很难挂掉。其次,如果 MASTER 磁盘损坏(概率非常低),我们也可以用 OBSERVER 上的元数据,通过 `故障恢复` 的方式手动恢复。 - -* FE 进程的 JVM 一定要保证足够的内存。我们**强烈建议** FE 的 JVM 内存至少在 10GB 以上,推荐 32GB 至 64GB。并且部署监控来监控 JVM 的内存使用情况。因为如果FE出现OOM,可能导致元数据写入失败,造成一些**无法恢复**的故障! - -* FE 所在节点要有足够的磁盘空间,以防止元数据过大导致磁盘空间不足。同时 FE 日志也会占用十几G 的磁盘空间。 - -## 其他常见问题 - -1. fe.log 中一直滚动 `meta out of date. current time: xxx, synchronized time: xxx, has log: xxx, fe type: xxx` - - 这个通常是因为 FE 无法选举出 Master。比如配置了 3 个 FOLLOWER,但是只启动了一个 FOLLOWER,则这个 FOLLOWER 会出现这个问题。通常,只要把剩余的 FOLLOWER 启动起来就可以了。如果启动起来后,仍然没有解决问题,那么可能需要按照 `故障恢复` 一节中的方式,手动进行恢复。 - -2. `Clock delta: xxxx ms. between Feeder: xxxx and this Replica exceeds max permissible delta: xxxx ms.` - - bdbje 要求各个节点之间的时钟误差不能超过一定阈值。如果超过,节点会异常退出。我们默认设置的阈值为 5000 ms,由 FE 的参数 `max_bdbje_clock_delta_ms` 控制,可以酌情修改。但我们建议使用 ntp 等时钟同步方式保证 Doris 集群各主机的时钟同步。 - - -3. `image/` 目录下的镜像文件很久没有更新 - - Master FE 会默认每 50000 条元数据 journal,生成一个镜像文件。在一个频繁使用的集群中,通常每隔半天到几天的时间,就会生成一个新的 image 文件。如果你发现 image 文件已经很久没有更新了(比如超过一个星期),则可以顺序的按照如下方法,查看具体原因: - - 1. 在 Master FE 的 fe.log 中搜索 `memory is not enough to do checkpoint. Committed memroy xxxx Bytes, used memory xxxx Bytes.` 字样。如果找到,则说明当前 FE 的 JVM 内存不足以用于生成镜像(通常我们需要预留一半的 FE 内存用于 image 的生成)。那么需要增加 JVM 的内存并重启 FE 后,再观察。每次 Master FE 重启后,都会直接生成一个新的 image。也可用这种重启方式,主动地生成新的 image。注意,如果是多 FOLLOWER 部署,那么当你重启当前 Master FE 后,另一个 FOLLOWER FE 会变成 MASTER,则后续的 image 生成会由新的 Master 负责。因此,你可能需要修改所有 FOLLOWER FE 的 JVM 内存配置。 - - 2. 在 Master FE 的 fe.log 中搜索 `begin to generate new image: image.xxxx`。如果找到,则说明开始生成 image 了。检查这个线程的后续日志,如果出现 `checkpoint finished save image.xxxx`,则说明 image 写入成功。如果出现 `Exception when generate new image file`,则生成失败,需要查看具体的错误信息。 - - -4. `bdb/` 目录的大小非常大,达到几个G或更多 - - 如果在排除无法生成新的 image 的错误后,bdb 目录在一段时间内依然很大。则可能是因为 Master FE 推送 image 不成功。可以在 Master FE 的 fe.log 中搜索 `push image.xxxx to other nodes. totally xx nodes, push successed yy nodes`。如果 yy 比 xx 小,则说明有的 FE 没有被推送成功。可以在 fe.log 中查看到具体的错误 `Exception when pushing image file. url = xxx`。 - - 同时,你也可以在 FE 的配置文件中添加配置:`edit_log_roll_num=xxxx`。该参数设定了每多少条元数据 journal,做一次 image。默认是 50000。可以适当改小这个数字,使得 image 更加频繁,从而加速删除旧的 journal。 - -5. FOLLOWER FE 接连挂掉 - - 因为 Doris 的元数据采用多数写策略,即一条元数据 journal 必须至少写入多数个 FOLLOWER FE 后(比如 3 个 FOLLOWER,必须写成功 2 个),才算成功。而如果写入失败,FE 进程会主动退出。那么假设有 A、B、C 三个 FOLLOWER,C 先挂掉,然后 B 再挂掉,那么 A 也会跟着挂掉。所以如 `最佳实践` 一节中所述,如果你没有丰富的元数据运维经验,不建议部署多 FOLLOWER。 - -6. fe.log 中出现 `get exception when try to close previously opened bdb database. ignore it` - - 如果后面有 `ignore it` 字样,通常无需处理。如果你有兴趣,可以在 `BDBEnvironment.java` 搜索这个错误,查看相关注释说明。 - -7. 从 `show frontends;` 看,某个 FE 的 `Join` 列为 `true`,但是实际该 FE 不正常 - - 通过 `show frontends;` 查看到的 `Join` 信息。该列如果为 `true`,仅表示这个 FE **曾经加入过** 集群。并不能表示当前仍然正常的存在于集群中。如果为 `false`,则表示这个 FE **从未加入过** 集群。 - -8. 关于 FE 的配置 `master_sync_policy`, `replica_sync_policy` 和 `txn_rollback_limit` - - `master_sync_policy` 用于指定当 Leader FE 写元数据日志时,是否调用 fsync(), `replica_sync_policy` 用于指定当 FE HA 部署时,其他 Follower FE 在同步元数据时,是否调用 fsync()。在早期的 Doris 版本中,这两个参数默认是 `WRITE_NO_SYNC`,即都不调用 fsync()。在最新版本的 Doris 中,默认已修改为 `SYNC`,即都调用 fsync()。调用 fsync() 会显著降低元数据写盘的效率。在某些环境下,IOPS 可能降至几百,延迟增加到2-3ms(但对于 Doris 元数据操作依然够用)。因此我们建议以下配置: - - 1. 对于单 Follower FE 部署,`master_sync_policy` 设置为 `SYNC`,防止 FE 系统宕机导致元数据丢失。 - 2. 对于多 Follower FE 部署,可以将 `master_sync_policy` 和 `replica_sync_policy` 设为 `WRITE_NO_SYNC`,因为我们认为多个系统同时宕机的概率非常低。 - - 如果在单 Follower FE 部署中,`master_sync_policy` 设置为 `WRITE_NO_SYNC`,则可能出现 FE 系统宕机导致元数据丢失。这时如果有其他 Observer FE 尝试重启时,可能会报错: - - ``` - Node xxx must rollback xx total commits(numPassedDurableCommits of which were durable) to the earliest point indicated by transaction xxxx in order to rejoin the replication group, but the transaction rollback limit of xxx prohibits this. - ``` - - 意思有部分已经持久化的事务需要回滚,但条数超过上限。这里我们的默认上限是 100,可以通过设置 `txn_rollback_limit` 改变。该操作仅用于尝试正常启动 FE,但已丢失的元数据无法恢复。 diff --git a/docs/zh-CN/administrator-guide/operation/multi-tenant.md b/docs/zh-CN/administrator-guide/operation/multi-tenant.md deleted file mode 100644 index dc818cd3ff..0000000000 --- a/docs/zh-CN/administrator-guide/operation/multi-tenant.md +++ /dev/null @@ -1,239 +0,0 @@ ---- -{ - "title": "多租户(弃用)", - "language": "zh-CN" -} ---- - - - -# 多租户(已弃用) - -该功能已弃用。新方案请参阅:[多租户和资源划分](../multi-tenant.md)。 - -## 背景 -Doris 作为一款 PB 级别的在线报表与多维分析数据库,对外通过开放云提供云端的数据库服务,并且对于每个云上的客户都单独部署了一套物理集群。对内,一套物理集群部署了多个业务,对于隔离性要求比较高的业务单独搭建了集群。针对以上存在几点问题: - -- 部署多套物理集群维护代价大(升级、功能上线、bug修复)。 -- 一个用户的查询或者查询引起的bug经常会影响其他用户。 -- 实际生产环境单机只能部署一个BE进程。而多个BE可以更好的解决胖节点问题。并且对于join、聚合操作可以提供更高的并发度。 - -综合以上三点,Doris需要新的多租户方案,既能做到较好的资源隔离和故障隔离,同时也能减少维护的代价,满足公有云和私有云的需求。 - -## 设计原则 - -- 使用简单 -- 开发代价小 -- 方便现有集群的迁移 - -## 名词解释 - -- FE: Frontend,即 Doris 中用于元数据管理即查询规划的模块。 -- BE: Backend,即 Doris 中用于存储和查询数据的模块。 -- Master: FE 的一种角色。一个Doris集群只有一个Master,其他的FE为Observer或者Follower。 -- instance:一个 BE 进程即是一个 instance。 -- host:单个物理机 -- cluster:即一个集群,由多个instance组成。 -- 租户:一个cluster属于一个租户。cluster和租户之间是一对一关系。 -- database:一个用户创建的数据库 - -## 主要思路 - -- 一个host上部署多个BE的instance,在进程级别做资源隔离。 -- 多个instance形成一个cluster,一个cluster分配给一个业务独立的的租户。 -- FE增加cluster这一级并负责cluster的管理。 -- CPU,IO,内存等资源隔离采用cgroup。 - -## 设计方案 - -为了能够达到隔离的目的,引入了**虚拟cluster**的概念。 - -1. cluster表示一个虚拟的集群,由多个BE的instance组成。多个cluster共享FE。 -2. 一个host上可以启动多个instance。cluster创建时,选取任意指定数量的instance,组成一个cluster。 -3. 创建cluster的同时,会创建一个名为superuser的账户,隶属于该cluster。superuser可以对cluster进行管理、创建数据库、分配权限等。 -4. Doris启动后,会创建一个默认的cluster:default_cluster。如果用户不希望使用多cluster的功能,则会提供这个默认的cluster,并隐藏多cluster的其他操作细节。 - -具体架构如下图: -![](/images/multi_tenant_arch.png) - -## SQL 接口 - -- 登录 - - 默认集群登录名: user_name@default_cluster 或者 user_name - - 自定义集群登录名:user_name@cluster_name - - `mysqlclient -h host -P port -u user_name@cluster_name -p password` - -- 添加、删除、下线(decommission)以及取消下线BE - - `ALTER SYSTEM ADD BACKEND "host:port"` - `ALTER SYSTEM DROP BACKEND "host:port"` - `ALTER SYSTEM DECOMMISSION BACKEND "host:port"` - `CANCEL DECOMMISSION BACKEND "host:port"` - - 强烈建议使用 DECOMMISSION 而不是 DROP 来删除 BACKEND。DECOMMISSION 操作会首先将需要下线节点上的数据拷贝到集群内其他instance上。之后,才会真正下线。 - -- 创建集群,并指定superuser账户的密码 - - `CREATE CLUSTER cluster_name PROPERTIES ("instance_num" = "10") identified by "password"` - -- 进入一个集群 - - `ENTER cluster_name` - -- 集群扩容、缩容 - - `ALTER CLUSTER cluster_name PROPERTIES ("instance_num" = "10")` - - 当指定的实例个数多于cluster现有be的个数,则为扩容,如果少于则为缩容。 - -- 链接、迁移db - - `LINK DATABASE src_cluster_name.db_name dest_cluster_name.db_name` - - 软链一个cluster的db到另外一个cluster的db ,对于需要临时访问其他cluster的db却不需要进行实际数据迁移的用户可以采用这种方式。 - - `MIGRATE DATABASE src_cluster_name.db_name dest_cluster_name.db_name` - - 如果需要对db进行跨cluster的迁移,在链接之后,执行migrate对数据进行实际的迁移。 - - 迁移不影响当前两个db的查询、导入等操作,这是一个异步的操作,可以通过`SHOW MIGRATIONS`查看迁移的进度。 - -- 删除集群 - - `DROP CLUSTER cluster_name` - - 删除集群,要求先手动删除的集群内所有database。 - -- 其他 - - `SHOW CLUSTERS` - - 展示系统内已经创建的集群。只有root用户有该权限。 - - `SHOW BACKENDS` - - 查看集群内的BE instance。 - - `SHOW MIGRATIONS` - - 展示当前正在进行的db迁移任务。执行完db的迁移后可以通过此命令查看迁移的进度。 - -## 详细设计 - -1. 命名空间隔离 - - 为了引入多租户,需要对系统内的cluster之间的命名空间进行隔离。 - - Doris现有的元数据采用的是image + journal 的方式(元数据的设计见相关文档)。Doris会把涉及元数据的操作的记录为一个 journal (操作日志),然后定时的按照**图1**的方式写成image,加载的时候按照写入的顺序读即可。但是这样就带来一个问题已经写入的格式不容易修改,比如记录数据分布的元数据格式为:database+table+tablet+replica 嵌套,如果按照以往的方式要做cluster之间的命名空间隔离,则需要在database上增加一层cluster,内部元数据的层级变为:cluster+database+table+tablet+replica,如**图2**所示。但加一层带来的问题有: - - - 增加一层带来的元数据改动,不兼容,需要按照图2的方式cluster+db+table+tablet+replica层级写,这样就改变了以往的元数据组织方式,老版本的升级会比较麻烦,比较理想的方式是按照图3在现有元数据的格式下顺序写入cluster的元数据。 - - - 代码里所有用到db、user等,都需要加一层cluster,一工作量大改动的地方多,层级深,多数代码都获取db,现有功能几乎都要改一遍,并且需要在db的锁的基础上嵌套一层cluster的锁。 - - ![](/images/palo_meta.png) - - 综上这里采用了一种通过给db、user名加前缀的方式去隔离内部因为cluster之间db、user名字冲突的问题。 - - 如下,所有的sql输入涉及db名、user名的,都需要根据自己所在的cluster来拼写db、user的全名。 - - ![](/images/cluster_namaspace.png) - - 采用这种方式以上两个问题不再有。元数据的组织方式也比较简单。即采用**图3**每个cluster记录下属于自己cluster的db、user,以及节点即可。 - -2. BE 节点管理 - - 每个cluster都有属于自己的一组instance,可以通过`SHOW BACKENDS`查看,为了区分出instance属于哪个cluster以及使用情况,BE引入了多个状态: - - - free:当一个BE节点被加入系统内,此时be不属于任何cluster的时候处于空闲状态 - - using:当创建集群、或者扩容被选取到一个cluster内则处于使用中。 - - cluster decommission:如果执行缩容量,则正在执行缩容的be处于此状态。结束后,be状态变为free。 - - system decommission:be正在下线中。下线完成后,该be将会被永久删除。 - - 只有root用户可以通过`SHOW PROC "/backends"`中cluster这一项查看集群内所有be的是否被使用。为空则为空闲,否则为使用中。`SHOW BACKENDS`只能看到所在cluster的节点。以下是be节点状态变化的示意图。 - - ![](/images/backend_state.png) - -3. 创建集群 - - 只有root用户可以创建一个cluster,并指定任意数量的BE instance。 - - 支持在相同机器上选取多个instance。选择instance的大致原则是:尽可能选取不同机器上的be并且使所有机器上使用的be数尽可能均匀。 - - 对于使用来讲,每一个user、db都属于一个cluster(root除外)。为了创建user、db,首先需要进入一个cluster。在创建cluster的时候系统会默认生成这个cluster的管理员,即superuser账户。superuser具有在所属cluster内创建db、user,以及查看be节点数的权限。所有的非root用户登录必须指定一个cluster,即`user_name@cluster_name`。 - - 只有root用户可以通过`SHOW CLUSTER`查看系统内所有的cluster,并且可以通过@不同的集群名来进入不同的cluster。对于除了root之外的用户cluster都是不可见的。 - - 为了兼容老版本Doris内置了一个名字叫做default_cluster的集群,这个名字在创建集群的时候不能使用。 - - ![](/images/user_authority.png) - -4. 集群扩容 - - 集群扩容的流程同创建集群。会优先选取不在集群之外的host上的BE instance。选取的原则同创建集群。 - -5. 集群缩容、CLUSTER DECOMMISSION - - 用户可以通过设置 cluster 的 instance num 来进行集群缩容。 - - 集群的缩容会优先在BE instance 数量最多的 host 上选取 instance 进行下线。 - - 用户也可以直接使用 `ALTER CLUSTER DECOMMISSION BACKEND` 来指定BE,进行集群缩容。 - -![](/images/replica_recover.png) - -6. 建表 - - 为了保证高可用,每个分片的副本必需在不同的机器上。所以建表时,选择副本所在be的策略为在每个host上随机选取一个be。然后从这些be中随机选取所需副本数量的be。总体上做到每个机器上分片分布均匀。 - - 因此,假如需要创建一个3副本的分片,即使cluster包含3个或以上的instance,但是只有2个或以下的host,依然不能创建该分片。 - -7. 负载均衡 - - 负载均衡的粒度为cluster级别,cluster之间不做负载均衡。但是在计算负载是在host一级进行的,而一个host上可能存在多个不同cluster的BE instance。 cluster内,会通过每个host上所有分片数目、存储使用率计算负载,然后把负载高的机器上的分片往负载低的机器上拷贝(详见负载均衡相关文档)。 - -8. LINK DATABASE(软链) - - 多个集群之间可以通过软链的方式访问彼此的数据。链接的级别为不同cluster的db。 - - 通过在一个cluster内,添加需要访问的其他cluster的db的信息,来访问其他cluster中的db。 - - 当查询链接的db时,所使用的计算以及存储资源为源db所在cluster的资源。 - - 被软链的db不能在源cluster中删除。只有链接的db被删除后,才可以删除源db。而删除链接db,不会删除源db。 - -9. MIGRATE DATABASE - - db可以在cluster之间进行物理迁移。 - - 要迁移db,必须先链接db。执行迁移后数据会迁移到链接的db所在的cluster,并且执行迁移后源db被删除,链接断开。 - - 数据的迁移,复用了负载均衡以及副本恢复中,复制数据的流程(详见负载均衡相关文档)。具体实现上,在执行`MIRAGTE`命令后,Doris会在元数据中,将源db的所有副本所属的cluster,修改为目的cluster。 - - Doris会定期检查集群内机器之间是否均衡、副本是否齐全、是否有多余的副本。db的迁移即借用了这个流程,在检查副本齐全的时候同时检查副本所在的be是否属于该cluster,如果不属于,则记入要恢复的副本。并且副本多余要删除的时候会优先删除cluster外的副本,然后再按照现有的策略选择:宕机的be的副本->clone的副本->版本落后的副本->负载高的host上的副本,直到副本没有多余。 - -![](/images/cluster_link_and_migrate_db.png) - -10. BE的进程隔离 - -  为了实现be进程之间实际cpu、io以及内存的隔离,需要依赖于be的部署。部署的时候需要在外围配置cgroup,把要部署的be的进程都写入cgroup。如果要实现io的物理隔离各be配置的数据存放路径需要在不同磁盘上,这里不做过多的介绍。 - diff --git a/docs/zh-CN/administrator-guide/operation/tablet-repair-and-balance.md b/docs/zh-CN/administrator-guide/operation/tablet-repair-and-balance.md deleted file mode 100644 index eb23662432..0000000000 --- a/docs/zh-CN/administrator-guide/operation/tablet-repair-and-balance.md +++ /dev/null @@ -1,775 +0,0 @@ ---- -{ - "title": "数据副本管理", - "language": "zh-CN" -} ---- - - - -# 数据副本管理 - -从 0.9.0 版本开始,Doris 引入了优化后的副本管理策略,同时支持了更为丰富的副本状态查看工具。本文档主要介绍 Doris 数据副本均衡、修复方面的调度策略,以及副本管理的运维方法。帮助用户更方便的掌握和管理集群中的副本状态。 - -> Colocation 属性的表的副本修复和均衡可以参阅 `docs/documentation/cn/administrator-guide/colocation-join.md` - -## 名词解释 - -1. Tablet:Doris 表的逻辑分片,一个表有多个分片。 -2. Replica:分片的副本,默认一个分片有3个副本。 -3. Healthy Replica:健康副本,副本所在 Backend 存活,且副本的版本完整。 -4. TabletChecker(TC):是一个常驻的后台线程,用于定期扫描所有的 Tablet,检查这些 Tablet 的状态,并根据检查结果,决定是否将 tablet 发送给 TabletScheduler。 -5. TabletScheduler(TS):是一个常驻的后台线程,用于处理由 TabletChecker 发来的需要修复的 Tablet。同时也会进行集群副本均衡的工作。 -6. TabletSchedCtx(TSC):是一个 tablet 的封装。当 TC 选择一个 tablet 后,会将其封装为一个 TSC,发送给 TS。 -7. Storage Medium:存储介质。Doris 支持对分区粒度指定不同的存储介质,包括 SSD 和 HDD。副本调度策略也是针对不同的存储介质分别调度的。 - -``` - - +--------+ +-----------+ - | Meta | | Backends | - +---^----+ +------^----+ - | | | 3. Send clone tasks - 1. Check tablets | | | - +--------v------+ +-----------------+ - | TabletChecker +--------> TabletScheduler | - +---------------+ +-----------------+ - 2. Waiting to be scheduled - - -``` - -上图是一个简化的工作流程。 - - -## 副本状态 - -一个 Tablet 的多个副本,可能因为某些情况导致状态不一致。Doris 会尝试自动修复这些状态不一致的副本,让集群尽快从错误状态中恢复。 - -**一个 Replica 的健康状态有以下几种:** - -1. BAD - - 即副本损坏。包括但不限于磁盘故障、BUG等引起的副本不可恢复的损毁状态。 - -2. VERSION\_MISSING - - 版本缺失。Doris 中每一批次导入都对应一个数据版本。而一个副本的数据由多个连续的版本组成。而由于导入错误、延迟等原因,可能导致某些副本的数据版本不完整。 - -3. HEALTHY - - 健康副本。即数据正常的副本,并且副本所在的 BE 节点状态正常(心跳正常且不处于下线过程中) - - -**一个 Tablet 的健康状态由其所有副本的状态决定,有以下几种:** - -1. REPLICA\_MISSING - - 副本缺失。即存活副本数小于期望副本数。 - -2. VERSION\_INCOMPLETE - - 存活副本数大于等于期望副本数,但其中健康副本数小于期望副本数。 - -3. REPLICA\_RELOCATING - - 拥有等于 replication num 的版本完整的存活副本数,但是部分副本所在的 BE 节点处于 unavailable 状态(比如 decommission 中) - -4. REPLICA\_MISSING\_IN\_CLUSTER - - 当使用多 cluster 方式时,健康副本数大于等于期望副本数,但在对应 cluster 内的副本数小于期望副本数。 - -5. REDUNDANT - - 副本冗余。健康副本都在对应 cluster 内,但数量大于期望副本数。或者有多余的 unavailable 副本。 - -6. FORCE\_REDUNDANT - - 这是一个特殊状态。只会出现在当期望副本数大于等于可用节点数时,并且 Tablet 处于副本缺失状态时出现。这种情况下,需要先删除一个副本,以保证有可用节点用于创建新副本。 - -7. COLOCATE\_MISMATCH - - 针对 Colocation 属性的表的分片状态。表示分片副本与 Colocation Group 的指定的分布不一致。 - -8. COLOCATE\_REDUNDANT - - 针对 Colocation 属性的表的分片状态。表示 Colocation 表的分片副本冗余。 - -9. HEALTHY - - 健康分片,即条件[1-8]都不满足。 - -## 副本修复 - -TabletChecker 作为常驻的后台进程,会定期检查所有分片的状态。对于非健康状态的分片,将会交给 TabletScheduler 进行调度和修复。修复的实际操作,都由 BE 上的 clone 任务完成。FE 只负责生成这些 clone 任务。 - -> 注1:副本修复的主要思想是先通过创建或补齐使得分片的副本数达到期望值,然后再删除多余的副本。 -> -> 注2:一个 clone 任务就是完成从一个指定远端 BE 拷贝指定数据到指定目的端 BE 的过程。 - -针对不同的状态,我们采用不同的修复方式: - -1. REPLICA\_MISSING/REPLICA\_RELOCATING - - 选择一个低负载的,可用的 BE 节点作为目的端。选择一个健康副本作为源端。clone 任务会从源端拷贝一个完整的副本到目的端。对于副本补齐,我们会直接选择一个可用的 BE 节点,而不考虑存储介质。 - -2. VERSION\_INCOMPLETE - - 选择一个相对完整的副本作为目的端。选择一个健康副本作为源端。clone 任务会从源端尝试拷贝缺失的版本到目的端的副本。 - -3. REPLICA\_MISSING\_IN\_CLUSTER - - 这种状态处理方式和 REPLICA\_MISSING 相同。 - -4. REDUNDANT - - 通常经过副本修复后,分片会有冗余的副本。我们选择一个冗余副本将其删除。冗余副本的选择遵从以下优先级: - 1. 副本所在 BE 已经下线 - 2. 副本已损坏 - 3. 副本所在 BE 失联或在下线中 - 4. 副本处于 CLONE 状态(该状态是 clone 任务执行过程中的一个中间状态) - 5. 副本有版本缺失 - 6. 副本所在 cluster 不正确 - 7. 副本所在 BE 节点负载高 - -5. FORCE\_REDUNDANT - - 不同于 REDUNDANT,因为此时虽然 Tablet 有副本缺失,但是因为已经没有额外的可用节点用于创建新的副本了。所以此时必须先删除一个副本,以腾出一个可用节点用于创建新的副本。 - 删除副本的顺序同 REDUNDANT。 - -6. COLOCATE\_MISMATCH - - 从 Colocation Group 中指定的副本分布 BE 节点中选择一个作为目的节点进行副本补齐。 - -7. COLOCATE\_REDUNDANT - - 删除一个非 Colocation Group 中指定的副本分布 BE 节点上的副本。 - -Doris 在选择副本节点时,不会将同一个 Tablet 的副本部署在同一个 host 的不同 BE 上。保证了即使同一个 host 上的所有 BE 都挂掉,也不会造成全部副本丢失。 - -### 调度优先级 - -TabletScheduler 里等待被调度的分片会根据状态不同,赋予不同的优先级。优先级高的分片将会被优先调度。目前有以下几种优先级。 - -1. VERY\_HIGH - - * REDUNDANT。对于有副本冗余的分片,我们优先处理。虽然逻辑上来讲,副本冗余的紧急程度最低,但是因为这种情况处理起来最快且可以快速释放资源(比如磁盘空间等),所以我们优先处理。 - * FORCE\_REDUNDANT。同上。 - -2. HIGH - - * REPLICA\_MISSING 且多数副本缺失(比如3副本丢失了2个) - * VERSION\_INCOMPLETE 且多数副本的版本缺失 - * COLOCATE\_MISMATCH 我们希望 Colocation 表相关的分片能够尽快修复完成。 - * COLOCATE\_REDUNDANT - -3. NORMAL - - * REPLICA\_MISSING 但多数存活(比如3副本丢失了1个) - * VERSION\_INCOMPLETE 但多数副本的版本完整 - * REPLICA\_RELOCATING 且多数副本需要 relocate(比如3副本有2个) - -4. LOW - - * REPLICA\_MISSING\_IN\_CLUSTER - * REPLICA\_RELOCATING 但多数副本 stable - -### 手动优先级 - -系统会自动判断调度优先级。但是有些时候,用户希望某些表或分区的分片能够更快的被修复。因此我们提供一个命令,用户可以指定某个表或分区的分片被优先修复: - -`ADMIN REPAIR TABLE tbl [PARTITION (p1, p2, ...)];` - -这个命令,告诉 TC,在扫描 Tablet 时,对需要优先修复的表或分区中的有问题的 Tablet,给予 VERY\_HIGH 的优先级。 - -> 注:这个命令只是一个 hint,并不能保证一定能修复成功,并且优先级也会随 TS 的调度而发生变化。并且当 Master FE 切换或重启后,这些信息都会丢失。 - -可以通过以下命令取消优先级: - -`ADMIN CANCEL REPAIR TABLE tbl [PARTITION (p1, p2, ...)];` - -### 优先级调度 - -优先级保证了损坏严重的分片能够优先被修复,提高系统可用性。但是如果高优先级的修复任务一直失败,则会导致低优先级的任务一直得不到调度。因此,我们会根据任务的运行状态,动态的调整任务的优先级,保证所有任务都有机会被调度到。 - -* 连续5次调度失败(如无法获取资源,无法找到合适的源端或目的端等),则优先级会被下调。 -* 持续 30 分钟未被调度,则上调优先级。 -* 同一 tablet 任务的优先级至少间隔 5 分钟才会被调整一次。 - -同时为了保证初始优先级的权重,我们规定,初始优先级为 VERY\_HIGH 的,最低被下调到 NORMAL。而初始优先级为 LOW 的,最多被上调为 HIGH。这里的优先级调整,也会调整用户手动设置的优先级。 - -## 副本均衡 - -Doris 会自动进行集群内的副本均衡。目前支持两种均衡策略,负载/分区。负载均衡适合需要兼顾节点磁盘使用率和节点副本数量的场景;而分区均衡会使每个分区的副本都均匀分布在各个节点,避免热点,适合对分区读写要求比较高的场景。但是,分区均衡不考虑磁盘使用率,使用分区均衡时需要注意磁盘的使用情况。 策略只能在fe启动前配置[tablet_rebalancer_type](../config/fe_config.md#配置项列表 ) ,不支持运行时切换。 - -### 负载均衡 - -负载均衡的主要思想是,对某些分片,先在低负载的节点上创建一个副本,然后再删除这些分片在高负载节点上的副本。同时,因为不同存储介质的存在,在同一个集群内的不同 BE 节点上,可能存在一种或两种存储介质。我们要求存储介质为 A 的分片在均衡后,尽量依然存储在存储介质 A 中。所以我们根据存储介质,对集群的 BE 节点进行划分。然后针对不同的存储介质的 BE 节点集合,进行负载均衡调度。 - -同样,副本均衡会保证不会将同一个 Tablet 的副本部署在同一个 host 的 BE 上。 - -#### BE 节点负载 - -我们用 ClusterLoadStatistics(CLS)表示一个 cluster 中各个 Backend 的负载均衡情况。TabletScheduler 根据这个统计值,来触发集群均衡。我们当前通过 **磁盘使用率** 和 **副本数量** 两个指标,为每个BE计算一个 loadScore,作为 BE 的负载分数。分数越高,表示该 BE 的负载越重。 - -磁盘使用率和副本数量各有一个权重系数,分别为 **capacityCoefficient** 和 **replicaNumCoefficient**,其 **和衡为1**。其中 capacityCoefficient 会根据实际磁盘使用率动态调整。当一个 BE 的总体磁盘使用率在 50% 以下,则 capacityCoefficient 值为 0.5,如果磁盘使用率在 75%(可通过 FE 配置项 `capacity_used_percent_high_water` 配置)以上,则值为 1。如果使用率介于 50% ~ 75% 之间,则该权重系数平滑增加,公式为: - -`capacityCoefficient= 2 * 磁盘使用率 - 0.5` - -该权重系数保证当磁盘使用率过高时,该 Backend 的负载分数会更高,以保证尽快降低这个 BE 的负载。 - -TabletScheduler 会每隔 20s 更新一次 CLS。 - -### 分区均衡 - -分区均衡的主要思想是,将每个分区的在各个 Backend 上的 replica 数量差(即 partition skew),减少到最小。因此只考虑副本个数,不考虑磁盘使用率。 -为了尽量少的迁移次数,分区均衡使用二维贪心的策略,优先均衡partition skew最大的分区,均衡分区时会尽量选择,可以使整个 cluster 的在各个 Backend 上的 replica 数量差(即 cluster skew/total skew)减少的方向。 - -#### skew 统计 - -skew 统计信息由`ClusterBalanceInfo`表示,其中,`partitionInfoBySkew`以 partition skew 为key排序,便于找到max partition skew;`beByTotalReplicaCount`则是以 Backend 上的所有 replica 个数为key排序。`ClusterBalanceInfo`同样保持在CLS中, 同样 20s 更新一次。 - -max partition skew 的分区可能有多个,采用随机的方式选择一个分区计算。 - -### 均衡策略 - -TabletScheduler 在每轮调度时,都会通过 LoadBalancer 来选择一定数目的健康分片作为 balance 的候选分片。在下一次调度时,会尝试根据这些候选分片,进行均衡调度。 - -## 资源控制 - -无论是副本修复还是均衡,都是通过副本在各个 BE 之间拷贝完成的。如果同一台 BE 同一时间执行过多的任务,则会带来不小的 IO 压力。因此,Doris 在调度时控制了每个节点上能够执行的任务数目。最小的资源控制单位是磁盘(即在 be.conf 中指定的一个数据路径)。我们默认为每块磁盘配置两个 slot 用于副本修复。一个 clone 任务会占用源端和目的端各一个 slot。如果 slot 数目为零,则不会再对这块磁盘分配任务。该 slot 个数可以通过 FE 的 `schedule_slot_num_per_path` 参数配置。 - -另外,我们默认为每块磁盘提供 2 个单独的 slot 用于均衡任务。目的是防止高负载的节点因为 slot 被修复任务占用,而无法通过均衡释放空间。 - -## 副本状态查看 - -副本状态查看主要是查看副本的状态,以及副本修复和均衡任务的运行状态。这些状态大部分都**仅存在于** Master FE 节点中。因此,以下命令需直连到 Master FE 执行。 - -### 副本状态 - -1. 全局状态检查 - - 通过 `SHOW PROC '/statistic';` 命令可以查看整个集群的副本状态。 - - ``` - +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+ - | DbId | DbName | TableNum | PartitionNum | IndexNum | TabletNum | ReplicaNum | UnhealthyTabletNum | InconsistentTabletNum | - +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+ - | 35153636 | default_cluster:DF_Newrisk | 3 | 3 | 3 | 96 | 288 | 0 | 0 | - | 48297972 | default_cluster:PaperData | 0 | 0 | 0 | 0 | 0 | 0 | 0 | - | 5909381 | default_cluster:UM_TEST | 7 | 7 | 10 | 320 | 960 | 1 | 0 | - | Total | 240 | 10 | 10 | 13 | 416 | 1248 | 1 | 0 | - +----------+-----------------------------+----------+--------------+----------+-----------+------------+--------------------+-----------------------+ - ``` - - 其中 `UnhealthyTabletNum` 列显示了对应的 Database 中,有多少 Tablet 处于非健康状态。`InconsistentTabletNum` 列显示了对应的 Database 中,有多少 Tablet 处于副本不一致的状态。最后一行 `Total` 行对整个集群进行了统计。正常情况下 `UnhealthyTabletNum` 和 `InconsistentTabletNum` 应为0。如果不为零,可以进一步查看具体有哪些 Tablet。如上图中,UM_TEST 数据库有 1 个 Tablet 状态不健康,则可以使用以下命令查看具体是哪一个 Tablet。 - - `SHOW PROC '/statistic/5909381';` - - 其中 `5909381` 为对应的 DbId。 - - ``` - +------------------+---------------------+ - | UnhealthyTablets | InconsistentTablets | - +------------------+---------------------+ - | [40467980] | [] | - +------------------+---------------------+ - ``` - - 上图会显示具体的不健康的 Tablet ID(40467980)。后面我们会介绍如何查看一个具体的 Tablet 的各个副本的状态。 - -2. 表(分区)级别状态检查 - - 用户可以通过以下命令查看指定表或分区的副本状态,并可以通过 WHERE 语句对状态进行过滤。如查看表 tbl1 中,分区 p1 和 p2 上状态为 OK 的副本: - - `ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) WHERE STATUS = "OK";` - - ``` - +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+ - | TabletId | ReplicaId | BackendId | Version | LastFailedVersion | LastSuccessVersion | CommittedVersion | SchemaHash | VersionNum | IsBad | State | Status | - +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+ - | 29502429 | 29502432 | 10006 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502429 | 36885996 | 10002 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502429 | 48100551 | 10007 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502433 | 29502434 | 10001 | 2 | -1 | 2 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502433 | 44900737 | 10004 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | - | 29502433 | 48369135 | 10006 | 2 | -1 | -1 | 1 | -1 | 2 | false | NORMAL | OK | - +----------+-----------+-----------+---------+-------------------+--------------------+------------------+------------+------------+-------+--------+--------+ - ``` - - 这里会展示所有副本的状态。其中 `IsBad` 列为 `true` 则表示副本已经损坏。而 `Status` 列则会显示另外的其他状态。具体的状态说明,可以通过 `HELP ADMIN SHOW REPLICA STATUS;` 查看帮助。 - - `ADMIN SHOW REPLICA STATUS` 命令主要用于查看副本的健康状态。用户还可以通过以下命令查看指定表中副本的一些额外信息: - - `SHOW TABLETS FROM tbl1;` - - ``` - +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+ - | TabletId | ReplicaId | BackendId | SchemaHash | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | DataSize | RowCount | State | LstConsistencyCheckTime | CheckVersion | CheckVersionHash | VersionCount | PathHash | MetaUrl | CompactionStatus | - +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+ - | 29502429 | 29502432 | 10006 | 1421156361 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -5822326203532286804 | url | url | - | 29502429 | 36885996 | 10002 | 1421156361 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -1441285706148429853 | url | url | - | 29502429 | 48100551 | 10007 | 1421156361 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | 784 | 0 | NORMAL | N/A | -1 | -1 | 2 | -4784691547051455525 | url | url | - +----------+-----------+-----------+------------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+----------+----------+--------+-------------------------+--------------+----------------------+--------------+----------------------+----------------------+----------------------+ - ``` - - 上图展示了包括副本大小、行数、版本数量、所在数据路径等一些额外的信息。 - - > 注:这里显示的 `State` 列的内容不代表副本的健康状态,而是副本处于某种任务下的状态,比如 CLONE、SCHEMA\_CHANGE、ROLLUP 等。 - - 此外,用户也可以通过以下命令,查看指定表或分区的副本分布情况,来检查副本分布是否均匀。 - - `ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1;` - - ``` - +-----------+------------+-------+---------+ - | BackendId | ReplicaNum | Graph | Percent | - +-----------+------------+-------+---------+ - | 10000 | 7 | | 7.29 % | - | 10001 | 9 | | 9.38 % | - | 10002 | 7 | | 7.29 % | - | 10003 | 7 | | 7.29 % | - | 10004 | 9 | | 9.38 % | - | 10005 | 11 | > | 11.46 % | - | 10006 | 18 | > | 18.75 % | - | 10007 | 15 | > | 15.62 % | - | 10008 | 13 | > | 13.54 % | - +-----------+------------+-------+---------+ - ``` - - 这里分别展示了表 tbl1 的副本在各个 BE 节点上的个数、百分比,以及一个简单的图形化显示。 - -4. Tablet 级别状态检查 - - 当我们要定位到某个具体的 Tablet 时,可以使用如下命令来查看一个具体的 Tablet 的状态。如查看 ID 为 29502553 的 tablet: - - `SHOW TABLET 29502553;` - - ``` - +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+ - | DbName | TableName | PartitionName | IndexName | DbId | TableId | PartitionId | IndexId | IsSync | DetailCmd | - +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+ - | default_cluster:test | test | test | test | 29502391 | 29502428 | 29502427 | 29502428 | true | SHOW PROC '/dbs/29502391/29502428/partitions/29502427/29502428/29502553'; | - +------------------------+-----------+---------------+-----------+----------+----------+-------------+----------+--------+---------------------------------------------------------------------------+ - ``` - - 上图显示了这个 tablet 所对应的数据库、表、分区、上卷表等信息。用户可以复制 `DetailCmd` 命令中的命令继续执行: - - `SHOW PROC '/dbs/29502391/29502428/partitions/29502427/29502428/29502553';` - - ``` - +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+----------+------------------+ - | ReplicaId | BackendId | Version | VersionHash | LstSuccessVersion | LstSuccessVersionHash | LstFailedVersion | LstFailedVersionHash | LstFailedTime | SchemaHash | DataSize | RowCount | State | IsBad | VersionCount | PathHash | MetaUrl | CompactionStatus | - +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+----------+------------------+ - | 43734060 | 10004 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | -8566523878520798656 | url | url | - | 29502555 | 10002 | 2 | 0 | 2 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1885826196444191611 | url | url | - | 39279319 | 10007 | 2 | 0 | -1 | 0 | -1 | 0 | N/A | -1 | 784 | 0 | NORMAL | false | 2 | 1656508631294397870 | url | url | - +-----------+-----------+---------+-------------+-------------------+-----------------------+------------------+----------------------+---------------+------------+----------+----------+--------+-------+--------------+----------------------+----------+------------------+ - ``` - - 上图显示了对应 Tablet 的所有副本情况。这里显示的内容和 `SHOW TABLET FROM tbl1;` 的内容相同。但这里可以清楚的知道,一个具体的 Tablet 的所有副本的状态。 - -### 副本调度任务 - -1. 查看等待被调度的任务 - - `SHOW PROC '/cluster_balance/pending_tablets';` - - ``` - +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+ - | TabletId | Type | Status | State | OrigPrio | DynmPrio | SrcBe | SrcPath | DestBe | DestPath | Timeout | Create | LstSched | LstVisit | Finished | Rate | FailedSched | FailedRunning | LstAdjPrio | VisibleVer | VisibleVerHash | CmtVer | CmtVerHash | ErrMsg | - +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+ - | 4203036 | REPAIR | REPLICA_MISSING | PENDING | HIGH | LOW | -1 | -1 | -1 | -1 | 0 | 2019-02-21 15:00:20 | 2019-02-24 11:18:41 | 2019-02-24 11:18:41 | N/A | N/A | 2 | 0 | 2019-02-21 15:00:43 | 1 | 0 | 2 | 0 | unable to find source replica | - +----------+--------+-----------------+---------+----------+----------+-------+---------+--------+----------+---------+---------------------+---------------------+---------------------+----------+------+-------------+---------------+---------------------+------------+---------------------+--------+---------------------+-------------------------------+ - ``` - - 各列的具体含义如下: - - * TabletId:等待调度的 Tablet 的 ID。一个调度任务只针对一个 Tablet - * Type:任务类型,可以是 REPAIR(修复) 或 BALANCE(均衡) - * Status:该 Tablet 当前的状态,如 REPLICA\_MISSING(副本缺失) - * State:该调度任务的状态,可能为 PENDING/RUNNING/FINISHED/CANCELLED/TIMEOUT/UNEXPECTED - * OrigPrio:初始的优先级 - * DynmPrio:当前动态调整后的优先级 - * SrcBe:源端 BE 节点的 ID - * SrcPath:源端 BE 节点的路径的 hash 值 - * DestBe:目的端 BE 节点的 ID - * DestPath:目的端 BE 节点的路径的 hash 值 - * Timeout:当任务被调度成功后,这里会显示任务的超时时间,单位秒 - * Create:任务被创建的时间 - * LstSched:上一次任务被调度的时间 - * LstVisit:上一次任务被访问的时间。这里“被访问”指包括被调度,任务执行汇报等和这个任务相关的被处理的时间点 - * Finished:任务结束时间 - * Rate:clone 任务的数据拷贝速率 - * FailedSched:任务调度失败的次数 - * FailedRunning:任务执行失败的次数 - * LstAdjPrio:上一次优先级调整的时间 - * CmtVer/CmtVerHash/VisibleVer/VisibleVerHash:用于执行 clone 任务的 version 信息 - * ErrMsg:任务被调度和运行过程中,出现的错误信息 - -2. 查看正在运行的任务 - - `SHOW PROC '/cluster_balance/running_tablets';` - - 其结果中各列的含义和 `pending_tablets` 相同。 - -3. 查看已结束任务 - - `SHOW PROC '/cluster_balance/history_tablets';` - - 我们默认只保留最近 1000 个完成的任务。其结果中各列的含义和 `pending_tablets` 相同。如果 `State` 列为 `FINISHED`,则说明任务正常完成。如果为其他,则可以根据 `ErrMsg` 列的错误信息查看具体原因。 - -## 集群负载及调度资源查看 - -1. 集群负载 - - 通过以下命令可以查看集群当前的负载情况: - - `SHOW PROC '/cluster_balance/cluster_load_stat';` - - 首先看到的是对不同存储介质的划分: - - ``` - +---------------+ - | StorageMedium | - +---------------+ - | HDD | - | SSD | - +---------------+ - ``` - - 点击某一种存储介质,可以看到包含该存储介质的 BE 节点的均衡状态: - - `SHOW PROC '/cluster_balance/cluster_load_stat/HDD';` - - ``` - +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+ - | BeId | Cluster | Available | UsedCapacity | Capacity | UsedPercent | ReplicaNum | CapCoeff | ReplCoeff | Score | Class | - +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+ - | 10003 | default_cluster | true | 3477875259079 | 19377459077121 | 17.948 | 493477 | 0.5 | 0.5 | 0.9284678149967587 | MID | - | 10002 | default_cluster | true | 3607326225443 | 19377459077121 | 18.616 | 496928 | 0.5 | 0.5 | 0.948660871419998 | MID | - | 10005 | default_cluster | true | 3523518578241 | 19377459077121 | 18.184 | 545331 | 0.5 | 0.5 | 0.9843539990641831 | MID | - | 10001 | default_cluster | true | 3535547090016 | 19377459077121 | 18.246 | 558067 | 0.5 | 0.5 | 0.9981869446537612 | MID | - | 10006 | default_cluster | true | 3636050364835 | 19377459077121 | 18.764 | 547543 | 0.5 | 0.5 | 1.0011489897614072 | MID | - | 10004 | default_cluster | true | 3506558163744 | 15501967261697 | 22.620 | 468957 | 0.5 | 0.5 | 1.0228319835582569 | MID | - | 10007 | default_cluster | true | 4036460478905 | 19377459077121 | 20.831 | 551645 | 0.5 | 0.5 | 1.057279369420761 | MID | - | 10000 | default_cluster | true | 4369719923760 | 19377459077121 | 22.551 | 547175 | 0.5 | 0.5 | 1.0964036415787461 | MID | - +----------+-----------------+-----------+---------------+----------------+-------------+------------+----------+-----------+--------------------+-------+ - ``` - - 其中一些列的含义如下: - - * Available:为 true 表示 BE 心跳正常,且没有处于下线中 - * UsedCapacity:字节,BE 上已使用的磁盘空间大小 - * Capacity:字节,BE 上总的磁盘空间大小 - * UsedPercent:百分比,BE 上的磁盘空间使用率 - * ReplicaNum:BE 上副本数量 - * CapCoeff/ReplCoeff:磁盘空间和副本数的权重系数 - * Score:负载分数。分数越高,负载越重 - * Class:根据负载情况分类,LOW/MID/HIGH。均衡调度会将高负载节点上的副本迁往低负载节点 - - 用户可以进一步查看某个 BE 上各个路径的使用率,比如 ID 为 10001 这个 BE: - - `SHOW PROC '/cluster_balance/cluster_load_stat/HDD/10001';` - - ``` - +------------------+------------------+---------------+---------------+---------+--------+----------------------+ - | RootPath | DataUsedCapacity | AvailCapacity | TotalCapacity | UsedPct | State | PathHash | - +------------------+------------------+---------------+---------------+---------+--------+----------------------+ - | /home/disk4/palo | 498.757 GB | 3.033 TB | 3.525 TB | 13.94 % | ONLINE | 4883406271918338267 | - | /home/disk3/palo | 704.200 GB | 2.832 TB | 3.525 TB | 19.65 % | ONLINE | -5467083960906519443 | - | /home/disk1/palo | 512.833 GB | 3.007 TB | 3.525 TB | 14.69 % | ONLINE | -7733211489989964053 | - | /home/disk2/palo | 881.955 GB | 2.656 TB | 3.525 TB | 24.65 % | ONLINE | 4870995507205544622 | - | /home/disk5/palo | 694.992 GB | 2.842 TB | 3.525 TB | 19.36 % | ONLINE | 1916696897889786739 | - +------------------+------------------+---------------+---------------+---------+--------+----------------------+ - ``` - - 这里显示了指定 BE 上,各个数据路径的磁盘使用率情况。 - -2. 调度资源 - - 用户可以通过以下命令,查看当前各个节点的 slot 使用情况: - - `SHOW PROC '/cluster_balance/working_slots';` - - ``` - +----------+----------------------+------------+------------+-------------+----------------------+ - | BeId | PathHash | AvailSlots | TotalSlots | BalanceSlot | AvgRate | - +----------+----------------------+------------+------------+-------------+----------------------+ - | 10000 | 8110346074333016794 | 2 | 2 | 2 | 2.459007474009069E7 | - | 10000 | -5617618290584731137 | 2 | 2 | 2 | 2.4730105014001578E7 | - | 10001 | 4883406271918338267 | 2 | 2 | 2 | 1.6711402709780257E7 | - | 10001 | -5467083960906519443 | 2 | 2 | 2 | 2.7540126380326536E7 | - | 10002 | 9137404661108133814 | 2 | 2 | 2 | 2.417217089806745E7 | - | 10002 | 1885826196444191611 | 2 | 2 | 2 | 1.6327378456676323E7 | - +----------+----------------------+------------+------------+-------------+----------------------+ - ``` - - 这里以数据路径为粒度,展示了当前 slot 的使用情况。其中 `AvgRate` 为历史统计的该路径上 clone 任务的拷贝速率,单位是字节/秒。 - -3. 优先修复查看 - - 以下命令,可以查看通过 `ADMIN REPAIR TABLE` 命令设置的优先修复的表或分区。 - - `SHOW PROC '/cluster_balance/priority_repair';` - - 其中 `RemainingTimeMs` 表示,这些优先修复的内容,将在这个时间后,被自动移出优先修复队列。以防止优先修复一直失败导致资源被占用。 - -### 调度器统计状态查看 - -我们收集了 TabletChecker 和 TabletScheduler 在运行过程中的一些统计信息,可以通过以下命令查看: - -`SHOW PROC '/cluster_balance/sched_stat';` - -``` -+---------------------------------------------------+-------------+ -| Item | Value | -+---------------------------------------------------+-------------+ -| num of tablet check round | 12041 | -| cost of tablet check(ms) | 7162342 | -| num of tablet checked in tablet checker | 18793506362 | -| num of unhealthy tablet checked in tablet checker | 7043900 | -| num of tablet being added to tablet scheduler | 1153 | -| num of tablet schedule round | 49538 | -| cost of tablet schedule(ms) | 49822 | -| num of tablet being scheduled | 4356200 | -| num of tablet being scheduled succeeded | 320 | -| num of tablet being scheduled failed | 4355594 | -| num of tablet being scheduled discard | 286 | -| num of tablet priority upgraded | 0 | -| num of tablet priority downgraded | 1096 | -| num of clone task | 230 | -| num of clone task succeeded | 228 | -| num of clone task failed | 2 | -| num of clone task timeout | 2 | -| num of replica missing error | 4354857 | -| num of replica version missing error | 967 | -| num of replica relocating | 0 | -| num of replica redundant error | 90 | -| num of replica missing in cluster error | 0 | -| num of balance scheduled | 0 | -+---------------------------------------------------+-------------+ -``` - -各行含义如下: - -* num of tablet check round:Tablet Checker 检查次数 -* cost of tablet check(ms):Tablet Checker 检查总耗时 -* num of tablet checked in tablet checker:Tablet Checker 检查过的 tablet 数量 -* num of unhealthy tablet checked in tablet checker:Tablet Checker 检查过的不健康的 tablet 数量 -* num of tablet being added to tablet scheduler:被提交到 Tablet Scheduler 中的 tablet 数量 -* num of tablet schedule round:Tablet Scheduler 运行次数 -* cost of tablet schedule(ms):Tablet Scheduler 运行总耗时 -* num of tablet being scheduled:被调度的 Tablet 总数量 -* num of tablet being scheduled succeeded:被成功调度的 Tablet 总数量 -* num of tablet being scheduled failed:调度失败的 Tablet 总数量 -* num of tablet being scheduled discard:调度失败且被抛弃的 Tablet 总数量 -* num of tablet priority upgraded:优先级上调次数 -* num of tablet priority downgraded:优先级下调次数 -* num of clone task:生成的 clone 任务数量 -* num of clone task succeeded:clone 任务成功的数量 -* num of clone task failed:clone 任务失败的数量 -* num of clone task timeout:clone 任务超时的数量 -* num of replica missing error:检查的状态为副本缺失的 tablet 的数量 -* num of replica version missing error:检查的状态为版本缺失的 tablet 的数量(该统计值包括了 num of replica relocating 和 num of replica missing in cluster error) -* num of replica relocating:检查的状态为 replica relocating 的 tablet 的数量 -* num of replica redundant error:检查的状态为副本冗余的 tablet 的数量 -* num of replica missing in cluster error:检查的状态为不在对应 cluster 的 tablet 的数量 -* num of balance scheduled:均衡调度的次数 - -> 注:以上状态都只是历史累加值。我们也在 FE 的日志中,定期打印了这些统计信息,其中括号内的数值表示自上次统计信息打印依赖,各个统计值的变化数量。 - -## 相关配置说明 - -### 可调整参数 - -以下可调整参数均为 fe.conf 中可配置参数。 - -* use\_new\_tablet\_scheduler - - * 说明:是否启用新的副本调度方式。新的副本调度方式即本文档介绍的副本调度方式。 - * 默认值:true - * 重要性:高 - -* tablet\_repair\_delay\_factor\_second - - * 说明:对于不同的调度优先级,我们会延迟不同的时间后开始修复。以防止因为例行重启、升级等过程中,产生大量不必要的副本修复任务。此参数为一个基准系数。对于 HIGH 优先级,延迟为 基准系数 * 1;对于 NORMAL 优先级,延迟为 基准系数 * 2;对于 LOW 优先级,延迟为 基准系数 * 3。即优先级越低,延迟等待时间越长。如果用户想尽快修复副本,可以适当降低该参数。 - * 默认值:60秒 - * 重要性:高 - -* schedule\_slot\_num\_per\_path - - * 说明:默认分配给每块磁盘用于副本修复的 slot 数目。该数目表示一块磁盘能同时运行的副本修复任务数。如果想以更快的速度修复副本,可以适当调高这个参数。单数值越高,可能对 IO 影响越大。 - * 默认值:2 - * 重要性:高 - -* balance\_load\_score\_threshold - - * 说明:集群均衡的阈值。默认为 0.1,即 10%。当一个 BE 节点的 load score,不高于或不低于平均 load score 的 10% 时,我们认为这个节点是均衡的。如果想让集群负载更加平均,可以适当调低这个参数。 - * 默认值:0.1 - * 重要性:中 - -* storage\_high\_watermark\_usage\_percent 和 storage\_min\_left\_capacity\_bytes - - * 说明:这两个参数,分别表示一个磁盘的最大空间使用率上限,以及最小的空间剩余下限。当一块磁盘的空间使用率大于上限,或者剩余空间小于下限时,该磁盘将不再作为均衡调度的目的地址。 - * 默认值:0.85 和 1048576000 (1GB) - * 重要性:中 - -* disable\_balance - - * 说明:控制是否关闭均衡功能。当副本处于均衡过程中时,有些功能,如 ALTER TABLE 等将会被禁止。而均衡可能持续很长时间。因此,如果用户希望尽快进行被禁止的操作。可以将该参数设为 true,以关闭均衡调度。 - * 默认值:false - * 重要性:中 - -### 不可调整参数 - -以下参数暂不支持修改,仅作说明。 - -* TabletChecker 调度间隔 - - TabletChecker 每20秒进行一次检查调度。 - -* TabletScheduler 调度间隔 - - TabletScheduler 每5秒进行一次调度 - -* TabletScheduler 每批次调度个数 - - TabletScheduler 每次调度最多 50 个 tablet。 - -* TabletScheduler 最大等待调度和运行中任务数 - - 最大等待调度任务数和运行中任务数为 2000。当超过 2000 后,TabletChecker 将不再产生新的调度任务给 TabletScheduler。 - -* TabletScheduler 最大均衡任务数 - - 最大均衡任务数为 500。当超过 500 后,将不再产生新的均衡任务。 - -* 每块磁盘用于均衡任务的 slot 数目 - - 每块磁盘用于均衡任务的 slot 数目为2。这个 slot 独立于用于副本修复的 slot。 - -* 集群均衡情况更新间隔 - - TabletScheduler 每隔 20 秒会重新计算一次集群的 load score。 - -* Clone 任务的最小和最大超时时间 - - 一个 clone 任务超时时间范围是 3min ~ 2hour。具体超时时间通过 tablet 的大小计算。计算公式为 (tablet size) / (5MB/s)。当一个 clone 任务运行失败 3 次后,该任务将终止。 - -* 动态优先级调整策略 - - 优先级最小调整间隔为 5min。当一个 tablet 调度失败5次后,会调低优先级。当一个 tablet 30min 未被调度时,会调高优先级。 - -## 相关问题 - -* 在某些情况下,默认的副本修复和均衡策略可能会导致网络被打满(多发生在千兆网卡,且每台 BE 的磁盘数量较多的情况下)。此时需要调整一些参数来减少同时进行的均衡和修复任务数。 - -* 目前针对 Colocate Table 的副本的均衡策略无法保证同一个 Tablet 的副本不会分布在同一个 host 的 BE 上。但 Colocate Table 的副本的修复策略会检测到这种分布错误并校正。但可能会出现,校正后,均衡策略再次认为副本不均衡而重新均衡。从而导致在两种状态间不停交替,无法使 Colocate Group 达成稳定。针对这种情况,我们建议在使用 Colocate 属性时,尽量保证集群是同构的,以减小副本分布在同一个 host 上的概率。 - -## 最佳实践 - -### 控制并管理集群的副本修复和均衡进度 - -在大多数情况下,通过默认的参数配置,Doris 都可以自动的进行副本修复和集群均衡。但是某些情况下,我们需要通过人工介入调整参数,来达到一些特殊的目的。如优先修复某个表或分区、禁止集群均衡以降低集群负载、优先修复非 colocation 的表数据等等。 - -本小节主要介绍如何通过修改参数,来控制并管理集群的副本修复和均衡进度。 - -1. 删除损坏副本 - - 某些情况下,Doris 可能无法自动检测某些损坏的副本,从而导致查询或导入在损坏的副本上频繁报错。此时我们需要手动删除已损坏的副本。该方法可以适用于:删除版本数过高导致 -235 错误的副本、删除文件已损坏的副本等等。 - - 首先,找到副本对应的 tablet id,假设为 10001。通过 `show tablet 10001;` 并执行其中的 `show proc` 语句可以查看对应的 tablet 的各个副本详情。 - - 假设需要删除的副本的 backend id 是 20001。则执行以下语句将副本标记为 `bad`: - - ``` - ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10001", "backend_id" = "20001", "status" = "bad"); - ``` - - 此时,再次通过 `show proc` 语句可以看到对应的副本的 `IsBad` 列值为 `true`。 - - 被标记为 `bad` 的副本不会再参与导入和查询。同时副本修复逻辑会自动补充一个新的副本。 - -2. 优先修复某个表或分区 - - `help admin repair table;` 查看帮助。该命令会尝试优先修复指定表或分区的tablet。 - -3. 停止均衡任务 - - 均衡任务会占用一定的网络带宽和IO资源。如果希望停止新的均衡任务的产生,可以通过以下命令: - - ``` - ADMIN SET FRONTEND CONFIG ("disable_balance" = "true"); - ``` - -4. 停止所有副本调度任务 - - 副本调度任务包括均衡和修复任务。这些任务都会占用一定的网络带宽和IO资源。可以通过以下命令停止所有副本调度任务(不包括已经在运行的,包括 colocation 表和普通表): - - ``` - ADMIN SET FRONTEND CONFIG ("disable_tablet_scheduler" = "true"); - ``` - -5. 停止所有 colocation 表的副本调度任务。 - - colocation 表的副本调度和普通表是分开独立运行的。某些情况下,用户可能希望先停止对 colocation 表的均衡和修复工作,而将集群资源用于普通表的修复,则可以通过以下命令: - - ``` - ADMIN SET FRONTEND CONFIG ("disable_colocate_balance" = "true"); - ``` - -6. 使用更保守的策略修复副本 - - Doris 在检测到副本缺失、BE宕机等情况下,会自动修复副本。但为了减少一些抖动导致的错误(如BE短暂宕机),Doris 会延迟触发这些任务。 - - * `tablet_repair_delay_factor_second` 参数。默认 60 秒。根据修复任务优先级的不同,会推迟 60秒、120秒、180秒后开始触发修复任务。可以通过以下命令延长这个时间,这样可以容忍更长的异常时间,以避免触发不必要的修复任务: - - ``` - ADMIN SET FRONTEND CONFIG ("tablet_repair_delay_factor_second" = "120"); - ``` - -7. 使用更保守的策略触发 colocation group 的重分布 - - colocation group 的重分布可能伴随着大量的 tablet 迁移。`colocate_group_relocate_delay_second` 用于控制重分布的触发延迟。默认 1800秒。如果某台 BE 节点可能长时间下线,可以尝试调大这个参数,以避免不必要的重分布: - - ``` - ADMIN SET FRONTEND CONFIG ("colocate_group_relocate_delay_second" = "3600"); - ``` - -8. 更快速的副本均衡 - - Doris 的副本均衡逻辑会先增加一个正常副本,然后在删除老的副本,已达到副本迁移的目的。而在删除老副本时,Doris会等待这个副本上已经开始执行的导入任务完成,以避免均衡任务影响导入任务。但这样会降低均衡逻辑的执行速度。此时可以通过修改以下参数,让 Doris 忽略这个等待,直接删除老副本: - - ``` - ADMIN SET FRONTEND CONFIG ("enable_force_drop_redundant_replica" = "true"); - ``` - - 这种操作可能会导致均衡期间部分导入任务失败(需要重试),但会显著加速均衡速度。 - -总体来讲,当我们需要将集群快速恢复到正常状态时,可以考虑按照以下思路处理: - -1. 找到导致高优任务报错的tablet,将有问题的副本置为 bad。 -2. 通过 `admin repair` 语句高优修复某些表。 -3. 停止副本均衡逻辑以避免占用集群资源,等集群恢复后,再开启即可。 -4. 使用更保守的策略触发修复任务,以应对 BE 频繁宕机导致的雪崩效应。 -5. 按需关闭 colocation 表的调度任务,集中集群资源修复其他高优数据。 - - - diff --git a/docs/zh-CN/administrator-guide/orthogonal-bitmap-manual.md b/docs/zh-CN/administrator-guide/orthogonal-bitmap-manual.md deleted file mode 100644 index 238ac04a97..0000000000 --- a/docs/zh-CN/administrator-guide/orthogonal-bitmap-manual.md +++ /dev/null @@ -1,161 +0,0 @@ ---- -{ - "title": "正交的BITMAP计算", - "language": "zh-CN" -} ---- - - - -# 正交的BITMAP计算 - -## 背景 - -Doris原有的Bitmap聚合函数设计比较通用,但对亿级别以上bitmap大基数的交并集计算性能较差。排查后端be的bitmap聚合函数逻辑,发现主要有两个原因。一是当bitmap基数较大时,如bitmap大小超过1g,网络/磁盘IO处理时间比较长;二是后端be实例在scan数据后全部传输到顶层节点进行求交和并运算,给顶层单节点带来压力,成为处理瓶颈。 - -解决思路是将bitmap列的值按照range划分,不同range的值存储在不同的分桶中,保证了不同分桶的bitmap值是正交的。当查询时,先分别对不同分桶中的正交bitmap进行聚合计算,然后顶层节点直接将聚合计算后的值合并汇总,并输出。如此会大大提高计算效率,解决了顶层单节点计算瓶颈问题。 - -## 使用指南 - -1. 建表,增加hid列,表示bitmap列值id范围, 作为hash分桶列 -2. 使用场景 - -### Create table - -建表时需要使用聚合模型,数据类型是 bitmap , 聚合函数是 bitmap_union - -``` -CREATE TABLE `user_tag_bitmap` ( - `tag` bigint(20) NULL COMMENT "用户标签", - `hid` smallint(6) NULL COMMENT "分桶id", - `user_id` bitmap BITMAP_UNION NULL COMMENT "" -) ENGINE=OLAP -AGGREGATE KEY(`tag`, `hid`) -COMMENT "OLAP" -DISTRIBUTED BY HASH(`hid`) BUCKETS 3 -``` -表schema增加hid列,表示id范围, 作为hash分桶列。 - -注:hid数和BUCKETS要设置合理,hid数设置至少是BUCKETS的5倍以上,以使数据hash分桶尽量均衡 - -### Data Load - -``` -LOAD LABEL user_tag_bitmap_test -( -DATA INFILE('hdfs://abc') -INTO TABLE user_tag_bitmap -COLUMNS TERMINATED BY ',' -(tmp_tag, tmp_user_id) -SET ( -tag = tmp_tag, -hid = ceil(tmp_user_id/5000000), -user_id = to_bitmap(tmp_user_id) -) -) -注意:5000000这个数不固定,可按需调整 -... -``` -数据格式: -``` -11111111,1 -11111112,2 -11111113,3 -11111114,4 -... -``` -注:第一列代表用户标签,由中文转换成数字 - -load数据时,对用户bitmap值range范围纵向切割,例如,用户id在1-5000000范围内的hid值相同,hid值相同的行会分配到一个分桶内,如此每个分桶内到的bitmap都是正交的。可以利用桶内bitmap值正交特性,进行交并集计算,计算结果会被shuffle至top节点聚合。 - - - -#### bitmap_orthogonal_intersect - -求bitmap交集函数 - -语法: - - orthogonal_bitmap_intersect(bitmap_column, column_to_filter, filter_values) - -参数: - - 第一个参数是Bitmap列,第二个参数是用来过滤的维度列,第三个参数是变长参数,含义是过滤维度列的不同取值 - -说明: - - 查询规划上聚合分2层,在第一层be节点(update、serialize)先按filter_values为key进行hash聚合,然后对所有key的bitmap求交集,结果序列化后发送至第二层be节点(merge、finalize),在第二层be节点对所有来源于第一层节点的bitmap值循环求并集 - -样例: -``` -select BITMAP_COUNT(orthogonal_bitmap_intersect(user_id, tag, 13080800, 11110200)) from user_tag_bitmap where tag in (13080800, 11110200); - -``` - -#### orthogonal_bitmap_intersect_count - -求bitmap交集count函数,语法同原版intersect_count,但实现不同 - -语法: - - orthogonal_bitmap_intersect_count(bitmap_column, column_to_filter, filter_values) - -参数: - - 第一个参数是Bitmap列,第二个参数是用来过滤的维度列,第三个参数开始是变长参数,含义是过滤维度列的不同取值 - -说明: - - 查询规划聚合上分2层,在第一层be节点(update、serialize)先按filter_values为key进行hash聚合,然后对所有key的bitmap求交集,再对交集结果求count,count值序列化后发送至第二层be节点(merge、finalize),在第二层be节点对所有来源于第一层节点的count值循环求sum - -#### orthogonal_bitmap_union_count - -求bitmap并集count函数,语法同原版bitmap_union_count,但实现不同。 - -语法: - - orthogonal_bitmap_union_count(bitmap_column) - -参数: - - 参数类型是bitmap,是待求并集count的列 - -说明: - - 查询规划上分2层,在第一层be节点(update、serialize)对所有bitmap求并集,再对并集的结果bitmap求count,count值序列化后发送至第二层be节点(merge、finalize),在第二层be节点对所有来源于第一层节点的count值循环求sum - -### 使用场景 - -符合对bitmap进行正交计算的场景,如在用户行为分析中,计算留存,漏斗,用户画像等。 - - -人群圈选: - -``` - select orthogonal_bitmap_intersect_count(user_id, tag, 13080800, 11110200) from user_tag_bitmap where tag in (13080800, 11110200); - 注:13080800、11110200代表用户标签 -``` - -计算user_id的去重值: - -``` -select orthogonal_bitmap_union_count(user_id) from user_tag_bitmap where tag in (13080800, 11110200); - -``` diff --git a/docs/zh-CN/administrator-guide/outfile.md b/docs/zh-CN/administrator-guide/outfile.md deleted file mode 100644 index 3a3b024997..0000000000 --- a/docs/zh-CN/administrator-guide/outfile.md +++ /dev/null @@ -1,192 +0,0 @@ ---- -{ - "title": "导出查询结果集", - "language": "zh-CN" -} ---- - - - -# 导出查询结果集 - -本文档介绍如何使用 `SELECT INTO OUTFILE` 命令进行查询结果的导出操作。 - -## 语法 - -`SELECT INTO OUTFILE` 语句可以将查询结果导出到文件中。目前支持通过 Broker 进程, 通过 S3 协议, 或直接通过 HDFS 协议,导出到远端存储,如 HDFS,S3,BOS,COS(腾讯云)上。语法如下 - -``` -query_stmt -INTO OUTFILE "file_path" -[format_as] -[properties] -``` - -* `file_path` - - `file_path` 指向文件存储的路径以及文件前缀。如 `hdfs://path/to/my_file_`。 - - 最终的文件名将由 `my_file_`,文件序号以及文件格式后缀组成。其中文件序号由0开始,数量为文件被分割的数量。如: - - ``` - my_file_abcdefg_0.csv - my_file_abcdefg_1.csv - my_file_abcdegf_2.csv - ``` - -* `[format_as]` - - ``` - FORMAT AS CSV - ``` - - 指定导出格式。支持csv、parquet、csv_with_names、csv_with_names_and_types. 默认为 CSV。 - - -* `[properties]` - - 指定相关属性。目前支持通过 Broker 进程, 或通过 S3 协议进行导出。 - - + Broker 相关属性需加前缀 `broker.`。具体参阅[Broker 文档](./broker.html)。 - + HDFS 相关属性需加前缀 `hdfs.` 其中 hdfs.fs.defaultFS 用于填写 namenode 地址和端口。属于必填项。。 - + S3 协议则直接执行 S3 协议配置即可。 - - ``` - ("broker.prop_key" = "broker.prop_val", ...) - or - ("hdfs.fs.defaultFS" = "xxx", "hdfs.hdfs_user" = "xxx") - or - ("AWS_ENDPOINT" = "xxx", ...) - ``` - - 其他属性: - - ``` - ("key1" = "val1", "key2" = "val2", ...) - ``` - - 目前支持以下属性: - - * `column_separator`:列分隔符,仅对 CSV 格式适用。默认为 `\t`。 - * `line_delimiter`:行分隔符,仅对 CSV 格式适用。默认为 `\n`。 - * `max_file_size`:单个文件的最大大小。默认为 1GB。取值范围在 5MB 到 2GB 之间。超过这个大小的文件将会被切分。 - * `schema`:PARQUET 文件schema信息。仅对 PARQUET 格式适用。导出文件格式为PARQUET时,必须指定`schema`。 - -## 并发导出 - -默认情况下,查询结果集的导出是非并发的,也就是单点导出。如果用户希望查询结果集可以并发导出,需要满足以下条件: - -1. session variable 'enable_parallel_outfile' 开启并发导出: ```set enable_parallel_outfile = true;``` -2. 导出方式为 S3 , 或者 HDFS, 而不是使用 broker -3. 查询可以满足并发导出的需求,比如顶层不包含 sort 等单点节点。(后面会举例说明,哪种属于不可并发导出结果集的查询) - -满足以上三个条件,就能触发并发导出查询结果集了。并发度 = ```be_instacne_num * parallel_fragment_exec_instance_num``` - -### 如何验证结果集被并发导出 - -用户通过 session 变量设置开启并发导出后,如果想验证当前查询是否能进行并发导出,则可以通过下面这个方法。 - -``` -explain select xxx from xxx where xxx into outfile "s3://xxx" format as csv properties ("AWS_ENDPOINT" = "xxx", ...); -``` - -对查询进行 explain 后,Doris 会返回该查询的规划,如果你发现 ```RESULT FILE SINK``` 出现在 ```PLAN FRAGMENT 1``` 中,就说明导出并发开启成功了。 -如果 ```RESULT FILE SINK``` 出现在 ```PLAN FRAGMENT 0``` 中,则说明当前查询不能进行并发导出 (当前查询不同时满足并发导出的三个条件)。 - -``` -并发导出的规划示例: -+-----------------------------------------------------------------------------+ -| Explain String | -+-----------------------------------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS: | | | | -| PARTITION: UNPARTITIONED | -| | -| RESULT SINK | -| | -| 1:EXCHANGE | -| | -| PLAN FRAGMENT 1 | -| OUTPUT EXPRS:`k1` + `k2` | -| PARTITION: HASH_PARTITIONED: `default_cluster:test`.`multi_tablet`.`k1` | -| | -| RESULT FILE SINK | -| FILE PATH: s3://ml-bd-repo/bpit_test/outfile_1951_ | -| STORAGE TYPE: S3 | -| | -| 0:OlapScanNode | -| TABLE: multi_tablet | -+-----------------------------------------------------------------------------+ -``` - -## 使用示例 - -具体参阅[OUTFILE 文档](../sql-reference/sql-statements/Data%20Manipulation/OUTFILE.md)。 - -## 返回结果 - -导出命令为同步命令。命令返回,即表示操作结束。同时会返回一行结果来展示导出的执行结果。 - -如果正常导出并返回,则结果如下: - -``` -mysql> select * from tbl1 limit 10 into outfile "file:///home/work/path/result_"; -+------------+-----------+----------+--------------------------------------------------------------------+ -| FileNumber | TotalRows | FileSize | URL | -+------------+-----------+----------+--------------------------------------------------------------------+ -| 1 | 2 | 8 | file:///192.168.1.10/home/work/path/result_{fragment_instance_id}_ | -+------------+-----------+----------+--------------------------------------------------------------------+ -1 row in set (0.05 sec) -``` - -* FileNumber:最终生成的文件个数。 -* TotalRows:结果集行数。 -* FileSize:导出文件总大小。单位字节。 -* URL:如果是导出到本地磁盘,则这里显示具体导出到哪个 Compute Node。 - -如果进行了并发导出,则会返回多行数据。 - -``` -+------------+-----------+----------+--------------------------------------------------------------------+ -| FileNumber | TotalRows | FileSize | URL | -+------------+-----------+----------+--------------------------------------------------------------------+ -| 1 | 3 | 7 | file:///192.168.1.10/home/work/path/result_{fragment_instance_id}_ | -| 1 | 2 | 4 | file:///192.168.1.11/home/work/path/result_{fragment_instance_id}_ | -+------------+-----------+----------+--------------------------------------------------------------------+ -2 rows in set (2.218 sec) -``` - -如果执行错误,则会返回错误信息,如: - -``` -mysql> SELECT * FROM tbl INTO OUTFILE ... -ERROR 1064 (HY000): errCode = 2, detailMessage = Open broker writer failed ... -``` - -## 注意事项 - -* 如果不开启并发导出,查询结果是由单个 BE 节点,单线程导出的。因此导出时间和导出结果集大小正相关。开启并发导出可以降低导出的时间。 -* 导出命令不会检查文件及文件路径是否存在。是否会自动创建路径、或是否会覆盖已存在文件,完全由远端存储系统的语义决定。 -* 如果在导出过程中出现错误,可能会有导出文件残留在远端存储系统上。Doris 不会清理这些文件。需要用户手动清理。 -* 导出命令的超时时间同查询的超时时间。可以通过 `SET query_timeout=xxx` 进行设置。 -* 对于结果集为空的查询,依然会产生一个大小为0的文件。 -* 文件切分会保证一行数据完整的存储在单一文件中。因此文件的大小并不严格等于 `max_file_size`。 -* 对于部分输出为非可见字符的函数,如 BITMAP、HLL 类型,输出为 `\N`,即 NULL。 -* 目前部分地理信息函数,如 `ST_Point` 的输出类型为 VARCHAR,但实际输出值为经过编码的二进制字符。当前这些函数会输出乱码。对于地理函数,请使用 `ST_AsText` 进行输出。 diff --git a/docs/zh-CN/administrator-guide/partition_cache.md b/docs/zh-CN/administrator-guide/partition_cache.md deleted file mode 100644 index a14ac5a837..0000000000 --- a/docs/zh-CN/administrator-guide/partition_cache.md +++ /dev/null @@ -1,197 +0,0 @@ -# 分区缓存 - -## 需求场景 -大部分数据分析场景是写少读多,数据写入一次,多次频繁读取,比如一张报表涉及的维度和指标,数据在凌晨一次性计算好,但每天有数百甚至数千次的页面访问,因此非常适合把结果集缓存起来。在数据分析或BI应用中,存在下面的业务场景: -* **高并发场景**,Doris可以较好的支持高并发,但单台服务器无法承载太高的QPS -* **复杂图表的看板**,复杂的Dashboard或者大屏类应用,数据来自多张表,每个页面有数十个查询,虽然每个查询只有数十毫秒,但是总体查询时间会在数秒 -* **趋势分析**,给定日期范围的查询,指标按日显示,比如查询最近7天内的用户数的趋势,这类查询数据量大,查询范围广,查询时间往往需要数十秒 -* **用户重复查询**,如果产品没有防重刷机制,用户因手误或其他原因重复刷新页面,导致提交大量的重复的SQL - -以上四种场景,在应用层的解决方案,把查询结果放到Redis中,周期性的更新缓存或者用户手工刷新缓存,但是这个方案有如下问题: -* **数据不一致**,无法感知数据的更新,导致用户经常看到旧的数据 -* **命中率低**,缓存整个查询结果,如果数据实时写入,缓存频繁失效,命中率低且系统负载较重 -* **额外成本**,引入外部缓存组件,会带来系统复杂度,增加额外成本 - -## 解决方案 -本分区缓存策略可以解决上面的问题,优先保证数据一致性,在此基础上细化缓存粒度,提升命中率,因此有如下特点: -* 用户无需担心数据一致性,通过版本来控制缓存失效,缓存的数据和从BE中查询的数据是一致的 -* 没有额外的组件和成本,缓存结果存储在BE的内存中,用户可以根据需要调整缓存内存大小 -* 实现了两种缓存策略,SQLCache和PartitionCache,后者缓存粒度更细 -* 用一致性哈希解决BE节点上下线的问题,BE中的缓存算法是改进的LRU - -## SQLCache -SQLCache按SQL的签名、查询的表的分区ID、分区最新版本来存储和获取缓存。三者组合确定一个缓存数据集,任何一个变化了,如SQL有变化,如查询字段或条件不一样,或数据更新后版本变化了,会导致命中不了缓存。 - -如果多张表Join,使用最近更新的分区ID和最新的版本号,如果其中一张表更新了,会导致分区ID或版本号不一样,也一样命中不了缓存。 - -SQLCache,更适合T+1更新的场景,凌晨数据更新,首次查询从BE中获取结果放入到缓存中,后续相同查询从缓存中获取。实时更新数据也可以使用,但是可能存在命中率低的问题,可以参考如下PartitionCache。 - -## PartitionCache - -### 设计原理 -1. SQL可以并行拆分,Q = Q1 ∪ Q2 ... ∪ Qn,R= R1 ∪ R2 ... ∪ Rn,Q为查询语句,R为结果集 -2. 拆分为只读分区和可更新分区,只读分区缓存,更新分区不缓存 - -如上,查询最近7天的每天用户数,如按日期分区,数据只写当天分区,当天之外的其他分区的数据,都是固定不变的,在相同的查询SQL下,查询某个不更新分区的指标都是固定的。如下,在2020-03-09当天查询前7天的用户数,2020-03-03至2020-03-07的数据来自缓存,2020-03-08第一次查询来自分区,后续的查询来自缓存,2020-03-09因为当天在不停写入,所以来自分区。 - -因此,查询N天的数据,数据更新最近的D天,每天只是日期范围不一样相似的查询,只需要查询D个分区即可,其他部分都来自缓存,可以有效降低集群负载,减少查询时间。 - -``` -MySQL [(none)]> SELECT eventdate,count(userid) FROM testdb.appevent WHERE eventdate>="2020-03-03" AND eventdate<="2020-03-09" GROUP BY eventdate ORDER BY eventdate; -+------------+-----------------+ -| eventdate | count(`userid`) | -+------------+-----------------+ -| 2020-03-03 | 15 | -| 2020-03-04 | 20 | -| 2020-03-05 | 25 | -| 2020-03-06 | 30 | -| 2020-03-07 | 35 | -| 2020-03-08 | 40 | //第一次来自分区,后续来自缓存 -| 2020-03-09 | 25 | //来自分区 -+------------+-----------------+ -7 rows in set (0.02 sec) -``` - -在PartitionCache中,缓存第一级Key是去掉了分区条件后的SQL的128位MD5签名,下面是改写后的待签名的SQL: -``` -SELECT eventdate,count(userid) FROM testdb.appevent GROUP BY eventdate ORDER BY eventdate; -``` -缓存的第二级Key是查询结果集的分区字段的内容,比如上面查询结果的eventdate列的内容,二级Key的附属信息是分区的版本号和版本更新时间。 - -下面演示上面SQL在2020-03-09当天第一次执行的流程: -1. 从缓存中获取数据 -``` -+------------+-----------------+ -| 2020-03-03 | 15 | -| 2020-03-04 | 20 | -| 2020-03-05 | 25 | -| 2020-03-06 | 30 | -| 2020-03-07 | 35 | -+------------+-----------------+ -``` -2. 从BE中获取数据的SQL和数据 -``` -SELECT eventdate,count(userid) FROM testdb.appevent WHERE eventdate>="2020-03-08" AND eventdate<="2020-03-09" GROUP BY eventdate ORDER BY eventdate; - -+------------+-----------------+ -| 2020-03-08 | 40 | -+------------+-----------------+ -| 2020-03-09 | 25 | -+------------+-----------------+ -``` -3. 最后发送给终端的数据 -``` -+------------+-----------------+ -| eventdate | count(`userid`) | -+------------+-----------------+ -| 2020-03-03 | 15 | -| 2020-03-04 | 20 | -| 2020-03-05 | 25 | -| 2020-03-06 | 30 | -| 2020-03-07 | 35 | -| 2020-03-08 | 40 | -| 2020-03-09 | 25 | -+------------+-----------------+ -``` -4. 发送给缓存的数据 -``` -+------------+-----------------+ -| 2020-03-08 | 40 | -+------------+-----------------+ -``` - -Partition缓存,适合按日期分区,部分分区实时更新,查询SQL较为固定。 - -分区字段也可以是其他字段,但是需要保证只有少量分区更新。 - -### 一些限制 -* 只支持OlapTable,其他存储如MySQL的表没有版本信息,无法感知数据是否更新 -* 只支持按分区字段分组,不支持按其他字段分组,按其他字段分组,该分组数据都有可能被更新,会导致缓存都失效 -* 只支持结果集的前半部分、后半部分以及全部命中缓存,不支持结果集被缓存数据分割成几个部分 - -## 使用方式 -### 开启SQLCache -确保fe.conf的cache_enable_sql_mode=true(默认是true) -``` -vim fe/conf/fe.conf -cache_enable_sql_mode=true -``` -在MySQL命令行中设置变量 -``` -MySQL [(none)]> set [global] enable_sql_cache=true; -``` -注:global是全局变量,不加指当前会话变量 - -### 开启PartitionCache -确保fe.conf的cache_enable_partition_mode=true(默认是true) -``` -vim fe/conf/fe.conf -cache_enable_partition_mode=true -``` -在MySQL命令行中设置变量 -``` -MySQL [(none)]> set [global] enable_partition_cache=true; -``` - -如果同时开启了两个缓存策略,下面的参数,需要注意一下: -``` -cache_last_version_interval_second=900 -``` -如果分区的最新版本的时间离现在的间隔,大于cache_last_version_interval_second,则会优先把整个查询结果缓存。如果小于这个间隔,如果符合PartitionCache的条件,则按PartitionCache数据。 - -### 监控 -FE的监控项: -``` -query_table //Query中有表的数量 -query_olap_table //Query中有Olap表的数量 -cache_mode_sql //识别缓存模式为sql的Query数量 -cache_hit_sql //模式为sql的Query命中Cache的数量 -query_mode_partition //识别缓存模式为Partition的Query数量 -cache_hit_partition //通过Partition命中的Query数量 -partition_all //Query中扫描的所有分区 -partition_hit //通过Cache命中的分区数量 - -Cache命中率 = (cache_hit_sql + cache_hit_partition) / query_olap_table -Partition命中率 = partition_hit / partition_all -``` - -BE的监控项: -``` -query_cache_memory_total_byte //Cache内存大小 -query_query_cache_sql_total_count //Cache的SQL的数量 -query_cache_partition_total_count //Cache分区数量 - -SQL平均数据大小 = cache_memory_total / cache_sql_total -Partition平均数据大小 = cache_memory_total / cache_partition_total -``` - -其他监控: -可以从Grafana中查看BE节点的CPU和内存指标,Query统计中的Query Percentile等指标,配合Cache参数的调整来达成业务目标。 - - -### 优化参数 -FE的配置项cache_result_max_row_count,查询结果集放入缓存的最大行数,可以根据实际情况调整,但建议不要设置过大,避免过多占用内存,超过这个大小的结果集不会被缓存。 -``` -vim fe/conf/fe.conf -cache_result_max_row_count=3000 -``` - -BE最大分区数量cache_max_partition_count,指每个SQL对应的最大分区数,如果是按日期分区,能缓存2年多的数据,假如想保留更长时间的缓存,请把这个参数设置得更大,同时修改cache_result_max_row_count的参数。 -``` -vim be/conf/be.conf -cache_max_partition_count=1024 -``` - -BE中缓存内存设置,有两个参数query_cache_max_size和query_cache_elasticity_size两部分组成(单位MB),内存超过query_cache_max_size + cache_elasticity_size会开始清理,并把内存控制到query_cache_max_size以下。可以根据BE节点数量,节点内存大小,和缓存命中率来设置这两个参数。 -``` -query_cache_max_size_mb=256 -query_cache_elasticity_size_mb=128 -``` -计算方法: - -假如缓存10K个Query,每个Query缓存1000行,每行是128个字节,分布在10台BE上,则每个BE需要128M内存(10K*1000*128/10)。 - -## 未尽事项 -* T+1的数据,是否也可以用Partition缓存? 目前不支持 -* 类似的SQL,之前查询了2个指标,现在查询3个指标,是否可以利用2个指标的缓存? 目前不支持 -* 按日期分区,但是需要按周维度汇总数据,是否可用PartitionCache? 目前不支持 diff --git a/docs/zh-CN/administrator-guide/privilege.md b/docs/zh-CN/administrator-guide/privilege.md deleted file mode 100644 index 1e59b2999c..0000000000 --- a/docs/zh-CN/administrator-guide/privilege.md +++ /dev/null @@ -1,234 +0,0 @@ ---- -{ - "title": "权限管理", - "language": "zh-CN" -} ---- - - - -# 权限管理 - -Doris 新的权限管理系统参照了 Mysql 的权限管理机制,做到了表级别细粒度的权限控制,基于角色的权限访问控制,并且支持白名单机制。 - -## 名词解释 - -1. 用户标识 user_identity - - 在权限系统中,一个用户被识别为一个 User Identity(用户标识)。用户标识由两部分组成:username 和 userhost。其中 username 为用户名,由英文大小写组成。userhost 表示该用户链接来自的 IP。user_identity 以 username@'userhost' 的方式呈现,表示来自 userhost 的 username。 - - user_identity 的另一种表现方式为 username@['domain'],其中 domain 为域名,可以通过 DNS 或 BNS(百度名字服务)解析为一组 ip。最终表现为一组 username@'userhost',所以后面我们统一使用 username@'userhost' 来表示。 - -2. 权限 Privilege - - 权限作用的对象是节点、数据库或表。不同的权限代表不同的操作许可。 - -3. 角色 Role - - Doris可以创建自定义命名的角色。角色可以被看做是一组权限的集合。新创建的用户可以被赋予某一角色,则自动被赋予该角色所拥有的权限。后续对角色的权限变更,也会体现在所有属于该角色的用户权限上。 - -4. 用户属性 user_property - - 用户属性直接附属于某一用户,而不是用户标识。即 cmy@'192.%' 和 cmy@['domain'] 都拥有同一组用户属性,该属性属于用户 cmy,而不是 cmy@'192.%' 或 cmy@['domain']。 - - 用户属性包括但不限于: 用户最大连接数、导入集群配置等等。 - -## 支持的操作 - -1. 创建用户:CREATE USER -2. 删除用户:DROP USER -3. 授权:GRANT -4. 撤权:REVOKE -5. 创建角色:CREATE ROLE -6. 删除角色:DROP ROLE -7. 查看当前用户权限:SHOW GRANTS -8. 查看所有用户权限:SHOW ALL GRANTS -9. 查看已创建的角色:SHOW ROLES -10. 查看用户属性:SHOW PROPERTY - -关于以上命令的详细帮助,可以通过 mysql 客户端连接 Doris 后,使用 help + command 获取帮助。如 `HELP CREATE USER`。 - -## 权限类型 - -Doris 目前支持以下几种权限 - -1. Node_priv - - 节点变更权限。包括 FE、BE、BROKER 节点的添加、删除、下线等操作。目前该权限只能授予 Root 用户。 - -2. Grant_priv - - 权限变更权限。允许执行包括授权、撤权、添加/删除/变更 用户/角色 等操作。 - -3. Select_priv - - 对数据库、表的只读权限。 - -4. Load_priv - - 对数据库、表的写权限。包括 Load、Insert、Delete 等。 - -5. Alter_priv - - 对数据库、表的更改权限。包括重命名 库/表、添加/删除/变更 列、添加/删除 分区等操作。 - -6. Create_priv - - 创建数据库、表、视图的权限。 - -7. Drop_priv - - 删除数据库、表、视图的权限。 - -8. Usage_priv - - 资源的使用权限。 - - -## 权限层级 - -同时,根据权限适用范围的不同,我们将库表的权限分为以下三个层级: - -1. GLOBAL LEVEL:全局权限。即通过 GRANT 语句授予的 `*.*` 上的权限。被授予的权限适用于任意数据库中的任意表。 -2. DATABASE LEVEL:数据库级权限。即通过 GRANT 语句授予的 `db.*` 上的权限。被授予的权限适用于指定数据库中的任意表。 -3. TABLE LEVEL:表级权限。即通过 GRANT 语句授予的 `db.tbl` 上的权限。被授予的权限适用于指定数据库中的指定表。 - -将资源的权限分为以下两个层级: - -1. GLOBAL LEVEL:全局权限。即通过 GRANT 语句授予的 `*` 上的权限。被授予的权限适用于资源。 -2. RESOURCE LEVEL: 资源级权限。即通过 GRANT 语句授予的 `resource_name` 上的权限。被授予的权限适用于指定资源。 - - -## ADMIN/GRANT 权限说明 - -ADMIN\_PRIV 和 GRANT\_PRIV 权限同时拥有**授予权限**的权限,较为特殊。这里对和这两个权限相关的操作逐一说明。 - -1. CREATE USER - - * 拥有 ADMIN 权限,或任意层级的 GRANT 权限的用户可以创建新用户。 - -2. DROP USER - - * 只有 ADMIN 权限可以删除用户。 - -3. CREATE/DROP ROLE - - * 只有 ADMIN 权限可以创建角色。 - -4. GRANT/REVOKE - - * 拥有 ADMIN 权限,或者 GLOBAL 层级 GRANT 权限的用户,可以授予或撤销任意用户的权限。 - * 拥有 DATABASE 层级 GRANT 权限的用户,可以授予或撤销任意用户对指定数据库的权限。 - * 拥有 TABLE 层级 GRANT 权限的用户,可以授予或撤销任意用户对指定数据库中指定表的权限。 - -5. SET PASSWORD - - * 拥有 ADMIN 权限,或者 GLOBAL 层级 GRANT 权限的用户,可以设置任意用户的密码。 - * 普通用户可以设置自己对应的 UserIdentity 的密码。自己对应的 UserIdentity 可以通过 `SELECT CURRENT_USER();` 命令查看。 - * 拥有非 GLOBAL 层级 GRANT 权限的用户,不可以设置已存在用户的密码,仅能在创建用户时指定密码。 - - -## 一些说明 - -1. Doris 初始化时,会自动创建如下用户和角色: - - 1. operator 角色:该角色拥有 Node\_priv 和 Admin\_priv,即对Doris的所有权限。后续某个升级版本中,我们可能会将该角色的权限限制为 Node\_priv,即仅授予节点变更权限。以满足某些云上部署需求。 - - 2. admin 角色:该角色拥有 Admin\_priv,即除节点变更以外的所有权限。 - - 3. root@'%':root 用户,允许从任意节点登陆,角色为 operator。 - - 4. admin@'%':admin 用户,允许从任意节点登陆,角色为 admin。 - -2. 不支持删除或更改默认创建的角色或用户的权限。 - -3. operator 角色的用户有且只有一个。admin 角色的用户可以创建多个。 - -4. 一些可能产生冲突的操作说明 - - 1. 域名与ip冲突: - - 假设创建了如下用户: - - CREATE USER cmy@['domain']; - - 并且授权: - - GRANT SELECT_PRIV ON \*.\* TO cmy@['domain'] - - 该 domain 被解析为两个 ip:ip1 和 ip2 - - 假设之后,我们对 cmy@'ip1' 进行一次单独授权: - - GRANT ALTER_PRIV ON \*.\* TO cmy@'ip1'; - - 则 cmy@'ip1' 的权限会被修改为 SELECT\_PRIV, ALTER\_PRIV。并且当我们再次变更 cmy@['domain'] 的权限时,cmy@'ip1' 也不会跟随改变。 - - 2. 重复ip冲突: - - 假设创建了如下用户: - - CREATE USER cmy@'%' IDENTIFIED BY "12345"; - - CREATE USER cmy@'192.%' IDENTIFIED BY "abcde"; - - 在优先级上,'192.%' 优先于 '%',因此,当用户 cmy 从 192.168.1.1 这台机器尝试使用密码 '12345' 登陆 Doris 会被拒绝。 - -5. 忘记密码 - - 如果忘记了密码无法登陆 Doris,可以在 Doris FE 节点所在机器,使用如下命令无密码登陆 Doris: - - `mysql-client -h 127.0.0.1 -P query_port -uroot` - - 登陆后,可以通过 SET PASSWORD 命令重置密码。 - -6. 任何用户都不能重置 root 用户的密码,除了 root 用户自己。 - -7. ADMIN\_PRIV 权限只能在 GLOBAL 层级授予或撤销。 - -8. 拥有 GLOBAL 层级 GRANT_PRIV 其实等同于拥有 ADMIN\_PRIV,因为该层级的 GRANT\_PRIV 有授予任意权限的权限,请谨慎使用。 - -9. `current_user()` 和 `user()` - - 用户可以通过 `SELECT current_user();` 和 `SELECT user();` 分别查看 `current_user` 和 `user`。其中 `current_user` 表示当前用户是以哪种身份通过认证系统的,而 `user` 则是用户当前实际的 `user_identity`。举例说明: - - 假设创建了 `user1@'192.%'` 这个用户,然后以为来自 192.168.10.1 的用户 user1 登陆了系统,则此时的 `current_user` 为 `user1@'192.%'`,而 `user` 为 `user1@'192.168.10.1'`。 - - 所有的权限都是赋予某一个 `current_user` 的,真实用户拥有对应的 `current_user` 的所有权限。 - -## 最佳实践 - -这里举例一些 Doris 权限系统的使用场景。 - -1. 场景一 - - Doris 集群的使用者分为管理员(Admin)、开发工程师(RD)和用户(Client)。其中管理员拥有整个集群的所有权限,主要负责集群的搭建、节点管理等。开发工程师负责业务建模,包括建库建表、数据的导入和修改等。用户访问不同的数据库和表来获取数据。 - - 在这种场景下,可以为管理员赋予 ADMIN 权限或 GRANT 权限。对 RD 赋予对任意或指定数据库表的 CREATE、DROP、ALTER、LOAD、SELECT 权限。对 Client 赋予对任意或指定数据库表 SELECT 权限。同时,也可以通过创建不同的角色,来简化对多个用户的授权操作。 - -2. 场景二 - - 一个集群内有多个业务,每个业务可能使用一个或多个数据。每个业务需要管理自己的用户。在这种场景下。管理员用户可以为每个数据库创建一个拥有 DATABASE 层级 GRANT 权限的用户。该用户仅可以对用户进行指定的数据库的授权。 - -3. 黑名单 - - Doris 本身不支持黑名单,只有白名单功能,但我们可以通过某些方式来模拟黑名单。假设先创建了名为 `user@'192.%'` 的用户,表示允许来自 `192.*` 的用户登录。此时如果想禁止来自 `192.168.10.1` 的用户登录。则可以再创建一个用户 `cmy@'192.168.10.1'` 的用户,并设置一个新的密码。因为 `192.168.10.1` 的优先级高于 `192.%`,所以来自 `192.168.10.1` 将不能再使用旧密码进行登录。 - - diff --git a/docs/zh-CN/administrator-guide/query_cache.md b/docs/zh-CN/administrator-guide/query_cache.md deleted file mode 100644 index cb2bdb119d..0000000000 --- a/docs/zh-CN/administrator-guide/query_cache.md +++ /dev/null @@ -1,155 +0,0 @@ ---- -{ - "title": "QUERY CACHE", - "language": "zh-CN" -} ---- - - -# QUERY CACHE - -## 1 需求 - -虽然在数据库存储层也做了对应的缓存,但这种数据库存储层的缓存一般针对的是查询内容,而且粒度也太小,一般只有表中数据没有变更的时候,数据库对应的cache才发挥了作用。 但这并不能减少业务系统对数据库进行增删改查所带来的庞大的IO压力。所以数据库缓存技术在此诞生,实现热点数据的高速缓存,提高应用的响应速度,极大缓解后端数据库的压力 - -- 高并发场景 - Doris可以较好地支持高并发,但单台服务器无法承载太高的QPS - -- 复杂图表的看板 - 复杂的Dashboard或者大屏类应用,数据来自多张表,每个页面有数十个查询,虽然每个查询只有数十毫秒,但是总体查询时间会在数秒 - -- 趋势分析 - 给定日期范围的查询,指标按日显示,比如查询最近7天内的用户数的趋势,这类查询数据量大,查询范围广,查询时间往往需要数十秒 - -- 用户重复查询 - 如果产品没有防重刷机制,用户因手误或其他原因重复刷新页面,导致提交大量的重复的SQL - -以上四种场景,一种在应用层的解决方案是把查询结果放到Redis中,周期性地更新缓存或者用户手动刷新缓存,但是这个方案有如下问题: - -- 数据不一致 - 无法感知数据的更新,导致用户经常看到旧的数据 - -- 命中率低 - 缓存整个查询结果,如果数据实时写入,缓存频繁失效,命中率低且系统负载较重 - -- 额外成本 - 引入外部缓存组件,会带来系统复杂度,增加额外成本 - -## 2 解决方案 - -目前我们设计出结果缓存和分区缓存两个模块 - -## 3 名词解释 - -1. 结果缓存 result_cache - -针对用户的sql直接缓存查询的结果集合 - -2. 分区缓存 partition_cache - -在partition粒度做针对每个分区查询的结果缓存 - -## 4 设计原理 - -### 1 结果缓存 `result_cache` - -result_cache 分两种 第一种为 result_cache_ttl 第二种为 result_cache_version - -#### `result_cache_ttl` - -result_cache_ttl 变量设置在用户Session中,用户可自定义是否开启,通过ttl时间来确定用户的sql是否使用缓存,`这里数据变更时不保证数据的正确性` -按照 用户 connectid,和查询的sql 来存储和获取缓存,超过缓存失效时间则命中不了缓存,该缓存也会被清理 - -#### ` result_cache_version` - -result_cache_version 按SQL的签名、查询的表的分区ID、分区最新版本来存储和获取缓存。三者组合确定一个缓存数据集,任何一个变化了,如SQL有变化,如查询字段或条件不一样,或数据更新后版本变化了,会导致命中不了缓存。 - -如果多张表Join,使用最近更新的分区ID和最新的版本号,如果其中一张表更新了,会导致分区ID或版本号不一样,也一样命中不了缓存。 - -### 2 分区缓存 `partition_cache` - -1. SQL可以并行拆分,Q = Q1 ∪ Q2 ... ∪ Qn,R= R1 ∪ R2 ... ∪ Rn,Q为查询语句,R为结果集 -2. 拆分为只读分区和可更新分区,只读分区缓存,更新分区不缓存 - -### 5 使用场景 - -|缓存类型|使用场景| -|--|--| -|result_cache_ttl|主要解决高QPS,用户重复查询的场景| -|result_cache_version|主要解决整张表长时间没有变更的场景| -|partition_cache|主要解决历史分区不变更的场景| - -## 6 参数 - -### fe - -#### cache 开关 - -1. `enable_result_cache_ttl` -- 解释: enable_result_cache_ttl 开关 -- 默认值:false - -2. `enable_result_cache_version` -- 解释:结果集缓存针对table版本的的开关 -- 默认值:false - -- `enable_partition_cache` -- 解释:分区缓存 开关 -- 默认值:false - -#### 每个查询是否缓存的限制 - -1. `cache_per_query_max_row_count` -- 缓存每个查询最大的行数 -- 默认值 3000 - -2. `cache_per_query_max_size_in_bytes` -- 缓存每次查询的大小,单位bytes -- 默认值 1Mb - -3. `result_cache_ttl_in_milliseconds` -- result cache 缓存时长 -- 默认值 3s - -### be - -1. `cache_max_partition_count` -- parition cache 最大缓存分区数 -- 默认值:1024 - -2. `cache_max_size_in_mb` `cache_elasticity_size_in_mb` -- BE中缓存内存设置,有两个参数cache_max_size_in_mb和cache_elasticity_size_in_mb),内存超过cache_max_size_in_mb+cache_elasticity_size_in_mb会开始清理,并把内存控制到cache_max_size_in_mb以下。可以根据BE节点数量,节点内存大小,和缓存命中率来设置这两个参数。 - -## 7 如何使用 - -- use enable_result_cache_ttl -``` -set `global` enable_result_cache_ttl =true -``` - -- use enable_result_cache_version -``` -set `global` enable_result_cache_version = true -``` - -- use enable_partition_cache -``` -set `global` enable_partition_cache = true -``` diff --git a/docs/zh-CN/administrator-guide/resource-management.md b/docs/zh-CN/administrator-guide/resource-management.md deleted file mode 100644 index 897fb53e80..0000000000 --- a/docs/zh-CN/administrator-guide/resource-management.md +++ /dev/null @@ -1,170 +0,0 @@ ---- -{ - "title": "资源管理", - "language": "zh-CN" -} ---- - - - -# 资源管理 - -为了节省Doris集群内的计算、存储资源,Doris需要引入一些其他外部资源来完成相关的工作,如Spark/GPU用于查询,HDFS/S3用于外部存储,Spark/MapReduce用于ETL, 通过ODBC连接外部存储等,因此我们引入资源管理机制来管理Doris使用的这些外部资源。 - - - -## 基本概念 - -一个资源包含名字、类型等基本信息,名字为全局唯一,不同类型的资源包含不同的属性,具体参考各资源的介绍。 - -资源的创建和删除只能由拥有 `admin` 权限的用户进行操作。一个资源隶属于整个Doris集群。拥有 `admin` 权限的用户可以将使用权限`usage_priv` 赋给普通用户。可参考`HELP GRANT`或者权限文档。 - - - -## 具体操作 - -资源管理主要有三个命令:`CREATE RESOURCE`,`DROP RESOURCE` 和 `SHOW RESOURCES`,分别为创建、删除和查看资源。这三个命令的具体语法可以通过MySQL客户端连接到 Doris 后,执行 `HELP cmd` 的方式查看帮助。 - -1. CREATE RESOURCE - - 语法 - - ```sql - CREATE [EXTERNAL] RESOURCE "resource_name" - PROPERTIES ("key"="value", ...); - ``` - - 在创建资源的命令中,用户必须提供以下信息: - - * `resource_name` 为 Doris 中配置的资源的名字。 - * `PROPERTIES` 是资源相关参数,如下: - * `type`:资源类型,必填,目前仅支持 spark与odbc_catalog。 - * 其他参数见各资源介绍。 - -2. DROP RESOURCE - - 该命令可以删除一个已存在的资源。具体操作见:`HELP DROP RESOURCE` - -3. SHOW RESOURCES - - 该命令可以查看用户有使用权限的资源。具体操作见:`HELP SHOW RESOURCES` - - - -## 支持的资源 - -目前Doris能够支持 -* Spark资源 : 完成ETL工作。 -* ODBC资源:查询和导入外部表的数据 - -下面将分别展示两种资源的使用方式。 - -### Spark - -#### 参数 - -##### Spark 相关参数如下: - -`spark.master`: 必填,目前支持yarn,spark://host:port。 - -`spark.submit.deployMode`: Spark 程序的部署模式,必填,支持 cluster,client 两种。 - -`spark.hadoop.yarn.resourcemanager.address`: master为yarn时必填。 - -`spark.hadoop.fs.defaultFS`: master为yarn时必填。 - -其他参数为可选,参考http://spark.apache.org/docs/latest/configuration.html。 - - - -##### 如果Spark用于ETL,还需要指定以下参数: - -`working_dir`: ETL 使用的目录。spark作为ETL资源使用时必填。例如:hdfs://host:port/tmp/doris。 - -`broker`: broker 名字。spark作为ETL资源使用时必填。需要使用`ALTER SYSTEM ADD BROKER` 命令提前完成配置。 - - * `broker.property_key`: broker读取ETL生成的中间文件时需要指定的认证信息等。 - - - -#### 示例 - -创建 yarn cluster 模式,名为 spark0 的 Spark 资源。 - -```sql -CREATE EXTERNAL RESOURCE "spark0" -PROPERTIES -( - "type" = "spark", - "spark.master" = "yarn", - "spark.submit.deployMode" = "cluster", - "spark.jars" = "xxx.jar,yyy.jar", - "spark.files" = "/tmp/aaa,/tmp/bbb", - "spark.executor.memory" = "1g", - "spark.yarn.queue" = "queue0", - "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999", - "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000", - "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", - "broker" = "broker0", - "broker.username" = "user0", - "broker.password" = "password0" -); -``` - -### ODBC - -#### 参数 - -##### ODBC 相关参数如下: - -`type`: 必填,且必须为`odbc_catalog`。作为resource的类型标识。 - -`user`: 外部表的账号,必填。 - -`password`: 外部表的密码,必填。 - -`host`: 外部表的连接ip地址,必填。 - -`port`: 外部表的连接端口,必填。 - -`odbc_type`: 标示外部表的类型,当前doris支持`mysql`与`oracle`,未来可能支持更多的数据库。引用该resource的ODBC外表必填,旧的mysql外表选填。 - -`driver`: 标示外部表使用的driver动态库,引用该resource的ODBC外表必填,旧的mysql外表选填。 - - -具体如何使用可以,可以参考[ODBC of Doris](../extending-doris/odbc-of-doris.html) - -#### 示例 - -创建oracle的odbc resource,名为 odbc_oracle 的 odbc_catalog的 资源。 - -```sql -CREATE EXTERNAL RESOURCE `oracle_odbc` -PROPERTIES ( -"type" = "odbc_catalog", -"host" = "192.168.0.1", -"port" = "8086", -"user" = "test", -"password" = "test", -"database" = "test", -"odbc_type" = "oracle", -"driver" = "Oracle 19 ODBC driver" -); -``` \ No newline at end of file diff --git a/docs/zh-CN/administrator-guide/runtime-filter.md b/docs/zh-CN/administrator-guide/runtime-filter.md deleted file mode 100644 index ca9f908ed9..0000000000 --- a/docs/zh-CN/administrator-guide/runtime-filter.md +++ /dev/null @@ -1,282 +0,0 @@ ---- -{ - "title": "Runtime Filter", - "language": "zh-CN" -} ---- - - - -# Runtime Filter - -Runtime Filter 是在 Doris 0.15 版本中正式加入的新功能。旨在为某些 Join 查询在运行时动态生成过滤条件,来减少扫描的数据量,避免不必要的I/O和网络传输,从而加速查询。 - -它的设计、实现和效果可以参阅 [ISSUE 6116](https://github.com/apache/incubator-doris/issues/6116)。 - -## 名词解释 - -* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。 -* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。 -* 左表:Join查询时,左边的表。进行Probe操作。可被Join Reorder调整顺序。 -* 右表:Join查询时,右边的表。进行Build操作。可被Join Reorder调整顺序。 -* Fragment:FE会将具体的SQL语句的执行转化为对应的Fragment并下发到BE进行执行。BE上执行对应Fragment,并将结果汇聚返回给FE。 -* Join on clause: `A join B on A.a=B.b`中的`A.a=B.b`,在查询规划时基于此生成join conjuncts,包含join Build和Probe使用的expr,其中Build expr在Runtime Filter中称为src expr,Probe expr在Runtime Filter中称为target expr。 - -## 原理 - -Runtime Filter在查询规划时生成,在HashJoinNode中构建,在ScanNode中应用。 - -举个例子,当前存在T1表与T2表的Join查询,它的Join方式为HashJoin,T1是一张事实表,数据行数为100000,T2是一张维度表,数据行数为2000,Doris join的实际情况是: -``` -| > HashJoinNode < -| | | -| | 100000 | 2000 -| | | -| OlapScanNode OlapScanNode -| ^ ^ -| | 100000 | 2000 -| T1 T2 -| -``` -显而易见对T2扫描数据要远远快于T1,如果我们主动等待一段时间再扫描T1,等T2将扫描的数据记录交给HashJoinNode后,HashJoinNode根据T2的数据计算出一个过滤条件,比如T2数据的最大和最小值,或者构建一个Bloom Filter,接着将这个过滤条件发给等待扫描T1的ScanNode,后者应用这个过滤条件,将过滤后的数据交给HashJoinNode,从而减少probe hash table的次数和网络开销,这个过滤条件就是Runtime Filter,效果如下: -``` -| > HashJoinNode < -| | | -| | 6000 | 2000 -| | | -| OlapScanNode OlapScanNode -| ^ ^ -| | 100000 | 2000 -| T1 T2 -| -``` -如果能将过滤条件(Runtime Filter)下推到存储引擎,则某些情况下可以利用索引来直接减少扫描的数据量,从而大大减少扫描耗时,效果如下: -``` -| > HashJoinNode < -| | | -| | 6000 | 2000 -| | | -| OlapScanNode OlapScanNode -| ^ ^ -| | 6000 | 2000 -| T1 T2 -| -``` -可见,和谓词下推、分区裁剪不同,Runtime Filter是在运行时动态生成的过滤条件,即在查询运行时解析join on clause确定过滤表达式,并将表达式广播给正在读取左表的ScanNode,从而减少扫描的数据量,进而减少probe hash table的次数,避免不必要的I/O和网络传输。 - -Runtime Filter主要用于大表join小表的优化,如果左表的数据量太小,或者右表的数据量太大,则Runtime Filter可能不会取得预期效果。 - -## 使用方式 - -### Runtime Filter查询选项 - -与Runtime Filter相关的查询选项信息,请参阅以下部分: - -- 第一个查询选项是调整使用的Runtime Filter类型,大多数情况下,您只需要调整这一个选项,其他选项保持默认即可。 - - - `runtime_filter_type`: 包括Bloom Filter、MinMax Filter、IN predicate、IN Or Bloom Filter,默认会使用IN Or Bloom Filter,部分情况下同时使用Bloom Filter、MinMax Filter、IN predicate时性能更高。 - -- 其他查询选项通常仅在某些特定场景下,才需进一步调整以达到最优效果。通常只在性能测试后,针对资源密集型、运行耗时足够长且频率足够高的查询进行优化。 - - - `runtime_filter_mode`: 用于调整Runtime Filter的下推策略,包括OFF、LOCAL、GLOBAL三种策略,默认设置为GLOBAL策略 - - - `runtime_filter_wait_time_ms`: 左表的ScanNode等待每个Runtime Filter的时间,默认1000ms - - - `runtime_filters_max_num`: 每个查询可应用的Runtime Filter中Bloom Filter的最大数量,默认10 - - - `runtime_bloom_filter_min_size`: Runtime Filter中Bloom Filter的最小长度,默认1048576(1M) - - - `runtime_bloom_filter_max_size`: Runtime Filter中Bloom Filter的最大长度,默认16777216(16M) - - - `runtime_bloom_filter_size`: Runtime Filter中Bloom Filter的默认长度,默认2097152(2M) - - - `runtime_filter_max_in_num`: 如果join右表数据行数大于这个值,我们将不生成IN predicate,默认1024 - -下面对查询选项做进一步说明。 - -#### 1.runtime_filter_type -使用的Runtime Filter类型。 - -**类型**: 数字(1, 2, 4, 8)或者相对应的助记符字符串(IN, BLOOM_FILTER, MIN_MAX, ```IN_OR_BLOOM_FILTER```),默认8(```IN_OR_BLOOM_FILTER```),使用多个时用逗号分隔,注意需要加引号,或者将任意多个类型的数字相加,例如: -``` -set runtime_filter_type="BLOOM_FILTER,IN,MIN_MAX"; -``` -等价于: -``` -set runtime_filter_type=7; -``` - -**使用注意事项** - -- **IN or Bloom Filter**: 根据右表在执行过程中的真实行数,由系统自动判断使用 IN predicate 还是 Bloom Filter - - 默认在右表数据行数少于1024时会使用IN predicate(可通过session变量中的`runtime_filter_max_in_num`调整,否则使用Bloom filter。 -- **Bloom Filter**: 有一定的误判率,导致过滤的数据比预期少一点,但不会导致最终结果不准确,在大部分情况下Bloom Filter都可以提升性能或对性能没有显著影响,但在部分情况下会导致性能降低。 - - Bloom Filter构建和应用的开销较高,所以当过滤率较低时,或者左表数据量较少时,Bloom Filter可能会导致性能降低。 - - 目前只有左表的Key列应用Bloom Filter才能下推到存储引擎,而测试结果显示Bloom Filter不下推到存储引擎时往往会导致性能降低。 - - 目前Bloom Filter仅在ScanNode上使用表达式过滤时有短路(short-circuit)逻辑,即当假阳性率过高时,不继续使用Bloom Filter,但当Bloom Filter下推到存储引擎后没有短路逻辑,所以当过滤率较低时可能导致性能降低。 - -- **MinMax Filter**: 包含最大值和最小值,从而过滤小于最小值和大于最大值的数据,MinMax Filter的过滤效果与join on clause中Key列的类型和左右表数据分布有关。 - - 当join on clause中Key列的类型为int/bigint/double等时,极端情况下,如果左右表的最大最小值相同则没有效果,反之右表最大值小于左表最小值,或右表最小值大于左表最大值,则效果最好。 - - 当join on clause中Key列的类型为varchar等时,应用MinMax Filter往往会导致性能降低。 - -- **IN predicate**: 根据join on clause中Key列在右表上的所有值构建IN predicate,使用构建的IN predicate在左表上过滤,相比Bloom Filter构建和应用的开销更低,在右表数据量较少时往往性能更高。 - - 默认只有右表数据行数少于1024才会下推(可通过session变量中的`runtime_filter_max_in_num`调整)。 - - 目前IN predicate已实现合并方法。 - - 当同时指定In predicate和其他filter,并且in的过滤数值没达到runtime_filter_max_in_num时,会尝试把其他filter去除掉。原因是In predicate是精确的过滤条件,即使没有其他filter也可以高效过滤,如果同时使用则其他filter会做无用功。目前仅在Runtime filter的生产者和消费者处于同一个fragment时才会有去除非in filter的逻辑。 - -#### 2.runtime_filter_mode -用于控制Runtime Filter在instance之间传输的范围。 - -**类型**: 数字(0, 1, 2)或者相对应的助记符字符串(OFF, LOCAL, GLOBAL),默认2(GLOBAL)。 - -**使用注意事项** - -LOCAL:相对保守,构建的Runtime Filter只能在同一个instance(查询执行的最小单元)上同一个Fragment中使用,即Runtime Filter生产者(构建Filter的HashJoinNode)和消费者(使用RuntimeFilter的ScanNode)在同一个Fragment,比如broadcast join的一般场景; - -GLOBAL:相对激进,除满足LOCAL策略的场景外,还可以将Runtime Filter合并后通过网络传输到不同instance上的不同Fragment中使用,比如Runtime Filter生产者和消费者在不同Fragment,比如shuffle join。 - -大多数情况下GLOBAL策略可以在更广泛的场景对查询进行优化,但在有些shuffle join中生成和合并Runtime Filter的开销超过给查询带来的性能优势,可以考虑更改为LOCAL策略。 - -如果集群中涉及的join查询不会因为Runtime Filter而提高性能,您可以将设置更改为OFF,从而完全关闭该功能。 - -在不同Fragment上构建和应用Runtime Filter时,需要合并Runtime Filter的原因和策略可参阅 [ISSUE 6116](https://github.com/apache/incubator-doris/issues/6116) - -#### 3.runtime_filter_wait_time_ms -Runtime Filter的等待耗时。 - -**类型**: 整数,默认1000,单位ms - -**使用注意事项** - -在开启Runtime Filter后,左表的ScanNode会为每一个分配给自己的Runtime Filter等待一段时间再扫描数据,即如果ScanNode被分配了3个Runtime Filter,那么它最多会等待3000ms。 - -因为Runtime Filter的构建和合并均需要时间,ScanNode会尝试将等待时间内到达的Runtime Filter下推到存储引擎,如果超过等待时间后,ScanNode会使用已经到达的Runtime Filter直接开始扫描数据。 - -如果Runtime Filter在ScanNode开始扫描之后到达,则ScanNode不会将该Runtime Filter下推到存储引擎,而是对已经从存储引擎扫描上来的数据,在ScanNode上基于该Runtime Filter使用表达式过滤,之前已经扫描的数据则不会应用该Runtime Filter,这样得到的中间数据规模会大于最优解,但可以避免严重的裂化。 - -如果集群比较繁忙,并且集群上有许多资源密集型或长耗时的查询,可以考虑增加等待时间,以避免复杂查询错过优化机会。如果集群负载较轻,并且集群上有许多只需要几秒的小查询,可以考虑减少等待时间,以避免每个查询增加1s的延迟。 - -#### 4.runtime_filters_max_num -每个查询生成的Runtime Filter中Bloom Filter数量的上限。 - -**类型**: 整数,默认10 - -**使用注意事项** -目前仅对Bloom Filter的数量进行限制,因为相比MinMax Filter和IN predicate,Bloom Filter构建和应用的代价更高。 - -如果生成的Bloom Filter超过允许的最大数量,则保留选择性大的Bloom Filter,选择性大意味着预期可以过滤更多的行。这个设置可以防止Bloom Filter耗费过多的内存开销而导致潜在的问题。 -``` -选择性=(HashJoinNode Cardinality / HashJoinNode left child Cardinality) --- 因为目前FE拿到Cardinality不准,所以这里Bloom Filter计算的选择性与实际不准,因此最终可能只是随机保留了部分Bloom Filter。 -``` -仅在对涉及大表间join的某些长耗时查询进行调优时,才需要调整此查询选项。 - -#### 5.Bloom Filter长度相关参数 -包括`runtime_bloom_filter_min_size`、`runtime_bloom_filter_max_size`、`runtime_bloom_filter_size`,用于确定Runtime Filter使用的Bloom Filter数据结构的大小(以字节为单位)。 - -**类型**: 整数 - -**使用注意事项** -因为需要保证每个HashJoinNode构建的Bloom Filter长度相同才能合并,所以目前在FE查询规划时计算Bloom Filter的长度。 - -如果能拿到join右表统计信息中的数据行数(Cardinality),会尝试根据Cardinality估计Bloom Filter的最佳大小,并四舍五入到最接近的2的幂(以2为底的log值)。如果无法拿到右表的Cardinality,则会使用默认的Bloom Filter长度`runtime_bloom_filter_size`。`runtime_bloom_filter_min_size`和`runtime_bloom_filter_max_size`用于限制最终使用的Bloom Filter长度最小和最大值。 - -更大的Bloom Filter在处理高基数的输入集时更有效,但需要消耗更多的内存。假如查询中需要过滤高基数列(比如含有数百万个不同的取值),可以考虑增加`runtime_bloom_filter_size`的值进行一些基准测试,这有助于使Bloom Filter过滤的更加精准,从而获得预期的性能提升。 - -Bloom Filter的有效性取决于查询的数据分布,因此通常仅对一些特定查询额外调整其Bloom Filter长度,而不是全局修改,一般仅在对涉及大表间join的某些长耗时查询进行调优时,才需要调整此查询选项。 - -### 查看query生成的Runtime Filter - -`explain`命令可以显示的查询计划中包括每个Fragment使用的join on clause信息,以及Fragment生成和使用Runtime Filter的注释,从而确认是否将Runtime Filter应用到了期望的join on clause上。 -- 生成Runtime Filter的Fragment包含的注释例如`runtime filters: filter_id[type] <- table.column`。 -- 使用Runtime Filter的Fragment包含的注释例如`runtime filters: filter_id[type] -> table.column`。 - -下面例子中的查询使用了一个ID为RF000的Runtime Filter。 -``` -CREATE TABLE test (t1 INT) DISTRIBUTED BY HASH (t1) BUCKETS 2 PROPERTIES("replication_num" = "1"); -INSERT INTO test VALUES (1), (2), (3), (4); - -CREATE TABLE test2 (t2 INT) DISTRIBUTED BY HASH (t2) BUCKETS 2 PROPERTIES("replication_num" = "1"); -INSERT INTO test2 VALUES (3), (4), (5); - -EXPLAIN SELECT t1 FROM test JOIN test2 where test.t1 = test2.t2; -+-------------------------------------------------------------------+ -| Explain String | -+-------------------------------------------------------------------+ -| PLAN FRAGMENT 0 | -| OUTPUT EXPRS:`t1` | -| | -| 4:EXCHANGE | -| | -| PLAN FRAGMENT 1 | -| OUTPUT EXPRS: | -| PARTITION: HASH_PARTITIONED: `default_cluster:ssb`.`test`.`t1` | -| | -| 2:HASH JOIN | -| | join op: INNER JOIN (BUCKET_SHUFFLE) | -| | equal join conjunct: `test`.`t1` = `test2`.`t2` | -| | runtime filters: RF000[in] <- `test2`.`t2` | -| | | -| |----3:EXCHANGE | -| | | -| 0:OlapScanNode | -| TABLE: test | -| runtime filters: RF000[in] -> `test`.`t1` | -| | -| PLAN FRAGMENT 2 | -| OUTPUT EXPRS: | -| PARTITION: HASH_PARTITIONED: `default_cluster:ssb`.`test2`.`t2` | -| | -| 1:OlapScanNode | -| TABLE: test2 | -+-------------------------------------------------------------------+ --- 上面`runtime filters`的行显示了`PLAN FRAGMENT 1`的`2:HASH JOIN`生成了ID为RF000的IN predicate, --- 其中`test2`.`t2`的key values仅在运行时可知, --- 在`0:OlapScanNode`使用了该IN predicate用于在读取`test`.`t1`时过滤不必要的数据。 - -SELECT t1 FROM test JOIN test2 where test.t1 = test2.t2; --- 返回2行结果[3, 4]; - --- 通过query的profile(set enable_profile=true;)可以查看查询内部工作的详细信息, --- 包括每个Runtime Filter是否下推、等待耗时、以及OLAP_SCAN_NODE从prepare到接收到Runtime Filter的总时长。 -RuntimeFilter:in: - - HasPushDownToEngine: true - - AWaitTimeCost: 0ns - - EffectTimeCost: 2.76ms - --- 此外,在profile的OLAP_SCAN_NODE中还可以查看Runtime Filter下推后的过滤效果和耗时。 - - RowsVectorPredFiltered: 9.320008M (9320008) - - VectorPredEvalTime: 364.39ms -``` - -## Runtime Filter的规划规则 -1. 只支持对join on clause中的等值条件生成Runtime Filter,不包括Null-safe条件,因为其可能会过滤掉join左表的null值。 -2. 不支持将Runtime Filter下推到left outer、full outer、anti join的左表; -3. 不支持src expr或target expr是常量; -4. 不支持src expr和target expr相等; -5. 不支持src expr的类型等于`HLL`或者`BITMAP`; -6. 目前仅支持将Runtime Filter下推给OlapScanNode; -7. 不支持target expr包含NULL-checking表达式,比如`COALESCE/IFNULL/CASE`,因为当outer join上层其他join的join on clause包含NULL-checking表达式并生成Runtime Filter时,将这个Runtime Filter下推到outer join的左表时可能导致结果不正确; -8. 不支持target expr中的列(slot)无法在原始表中找到某个等价列; -9. 不支持列传导,这包含两种情况: - - 一是例如join on clause包含A.k = B.k and B.k = C.k时,目前C.k只可以下推给B.k,而不可以下推给A.k; - - 二是例如join on clause包含A.a + B.b = C.c,如果A.a可以列传导到B.a,即A.a和B.a是等价的列,那么可以用B.a替换A.a,然后可以尝试将Runtime Filter下推给B(如果A.a和B.a不是等价列,则不能下推给B,因为target expr必须与唯一一个join左表绑定); -10. Target expr和src expr的类型必须相等,因为Bloom Filter基于hash,若类型不等则会尝试将target expr的类型转换为src expr的类型; -11. 不支持`PlanNode.Conjuncts`生成的Runtime Filter下推,与HashJoinNode的`eqJoinConjuncts`和`otherJoinConjuncts`不同,`PlanNode.Conjuncts`生成的Runtime Filter在测试中发现可能会导致错误的结果,例如`IN`子查询转换为join时,自动生成的join on clause将保存在`PlanNode.Conjuncts`中,此时应用Runtime Filter可能会导致结果缺少一些行。 diff --git a/docs/zh-CN/administrator-guide/segment-v2-usage.md b/docs/zh-CN/administrator-guide/segment-v2-usage.md deleted file mode 100644 index 1309182587..0000000000 --- a/docs/zh-CN/administrator-guide/segment-v2-usage.md +++ /dev/null @@ -1,157 +0,0 @@ ---- -{ - "title": "Segment V2 升级手册", - "language": "zh-CN" -} ---- - - - -# Segment V2 升级手册 - -## 背景 - -Doris 0.12 版本中实现了新的存储格式:Segment V2,引入词典压缩、bitmap索引、page cache等优化,能够提升系统性能。 - -0.12 版本会同时支持读写原有的 Segment V1(以下简称V1) 和新的 Segment V2(以下简称V2) 两种格式。如果原有数据想使用 V2 相关特性,需通过命令将 V1 转换成 V2 格式。 - -本文档主要介绍从 0.11 版本升级至 0.12 版本后,如何转换和使用 V2 格式。 - -V2 格式的表可以支持以下新的特性: - -1. bitmap 索引 -2. 内存表 -3. page cache -4. 字典压缩 -5. 延迟物化(Lazy Materialization) - -**从 0.13 版本开始,新建表的默认存储格式将为 Segment V2** - -## 集群升级 - -0.12 版本仅支持从 0.11 版本升级,不支持从 0.11 之前的版本升级。请先确保升级的前的 Doris 集群版本为 0.11。 - -0.12 版本有两个 V2 相关的重要参数: - -* `default_rowset_type`:FE 一个全局变量(Global Variable)设置,默认为 "alpha",即 V1 版本。 -* `default_rowset_type`:BE 的一个配置项,默认为 "ALPHA",即 V1 版本。 - -保持上述配置默认的话,按常规步骤对集群升级后,原有集群数据的存储格式不会变更,即依然为 V1 格式。如果对 V2 格式没有需求,则继续正常使用集群即可,无需做任何额外操作。所有原有数据、以及新导入的数据,都依然是 V1 版本。 - -## V2 格式转换 - -### 已有表数据转换成 V2 - -对于已有表数据的格式转换,Doris 提供两种方式: - -1. 创建一个 V2 格式的特殊 Rollup - - 该方式会针对指定表,创建一个 V2 格式的特殊 Rollup。创建完成后,新的 V2 格式的 Rollup 会和原有表格式数据并存。用户可以指定对 V2 格式的 Rollup 进行查询验证。 - - 该方式主要用于对 V2 格式的验证,因为不会修改原有表数据,因此可以安全的进行 V2 格式的数据验证,而不用担心表数据因格式转换而损坏。通常先使用这个方式对数据进行校验,之后再使用方法2对整个表进行数据格式转换。 - - 操作步骤如下: - - ``` - ## 创建 V2 格式的 Rollup - - ALTER TABLE table_name ADD ROLLUP table_name (columns) PROPERTIES ("storage_format" = "v2"); - ``` - - 其中, Rollup 的名称必须为表名。columns 字段可以任意填写,系统不会检查该字段的合法性。该语句会自动生成一个名为 `__V2_table_name` 的 Rollup,并且该 Rollup 列包含表的全部列。 - - 通过以下语句查看创建进度: - - ``` - SHOW ALTER TABLE ROLLUP; - ``` - - 创建完成后,可以通过 `DESC table_name ALL;` 查看到名为 `__v2_table_name` 的 Rollup。 - - 之后,通过如下命令,切换到 V2 格式查询: - - ``` - set use_v2_rollup = true; - select * from table_name limit 10; - ``` - - `use_V2_Rollup` 这个变量会强制查询名为 `__V2_table_name` 的 Rollup,并且不会考虑其他 Rollup 的命中条件。所以该参数仅用于对 V2 格式数据进行验证。 - -2. 转换现有表数据格式 - - 该方式相当于给指定的表发送一个 schema change 作业,作业完成后,表的所有数据会被转换成 V2 格式。该方法不会保留原有 v1 格式,所以请先使用方法1进行格式验证。 - - ``` - ALTER TABLE table_name SET ("storage_format" = "v2"); - ``` - - 之后通过如下命令查看作业进度: - - ``` - SHOW ALTER TABLE COLUMN; - ``` - - 作业完成后,该表的所有数据(包括Rollup)都转换为了 V2。且 V1 版本的数据已被删除。如果该表是分区表,则之后创建的分区也都是 V2 格式。 - - **V2 格式的表不能重新转换为 V1** - -### 创建新的 V2 格式的表 - -在不改变默认配置参数的情况下,用户可以创建 V2 格式的表: - -``` -CREATE TABLE tbl_name -( - k1 INT, - k2 INT -) -DISTRIBUTED BY HASH(k1) BUCKETS 1 -PROPERTIES -( - "storage_format" = "v2" -); -``` - -在 `properties` 中指定 `"storage_format" = "v2"` 后,该表将使用 V2 格式创建。如果是分区表,则之后创建的分区也都是 V2 格式。 - -### 全量格式转换(试验功能,不推荐) - -通过以下方式可以开启整个集群的全量数据格式转换(V1 -> V2)。全量数据转换是通过 BE 后台的数据 compaction 过程异步进行的。 -**该功能目前并没有很好的方式查看或控制转换进度,并且无法保证数据能够转换完成。可能导致同一张表长期处于同时包含两种数据格式的状态。因此建议使用 ALTER TABLE 针对性的转换。** - -1. 从 BE 开启全量格式转换 - - 在 `be.conf` 中添加变量 `default_rowset_type=BETA` 并重启 BE 节点。在之后的 compaction 流程中,数据会自动从 V1 转换成 V2。 - -2. 从 FE 开启全量格式转换 - - 通过 mysql 客户端连接 Doris 后,执行如下语句: - - `SET GLOBAL default_rowset_type = beta;` - - 执行完成后,FE 会通过心跳将信息发送给 BE,之后 BE 的 compaction 流程中,数据会自动从 V1 转换成 V2。 - - FE 的配置参数优先级高于 BE 的配置。即使 BE 中的配置 `default_rowset_type` 为 ALPHA,如果 FE 配置为 beta 后,则 BE 依然开始进行 V1 到 V2 的数据格式转换。 - - **建议先通过对单独表的数据格式转换验证后,再进行全量转换。全量转换的时间比较长,且进度依赖于 compaction 的进度。**可能出现 compaction 无法完成的情况,因此需要通过显式的执行 `ALTER TABLE` 操作进行个别表的数据格式转换。 - -3. 查看全量转换进度 - - 全量转换进度须通过脚本查看。脚本位置为代码库的 `tools/show_segment_status/` 目录。请参阅其中的 `README` 文档查看使用帮助。 diff --git a/docs/zh-CN/administrator-guide/small-file-mgr.md b/docs/zh-CN/administrator-guide/small-file-mgr.md deleted file mode 100644 index d107e5e639..0000000000 --- a/docs/zh-CN/administrator-guide/small-file-mgr.md +++ /dev/null @@ -1,104 +0,0 @@ ---- -{ - "title": "文件管理器", - "language": "zh-CN" -} ---- - - - -# 文件管理器 - -Doris 中的一些功能需要使用一些用户自定义的文件。比如用于访问外部数据源的公钥、密钥文件、证书文件等等。文件管理器提供这样一个功能,能够让用户预先上传这些文件并保存在 Doris 系统中,然后可以在其他命令中引用或访问。 - -## 名词解释 - -* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。 -* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。 -* BDBJE:Oracle Berkeley DB Java Edition。FE 中用于持久化元数据的分布式嵌入式数据库。 -* SmallFileMgr:文件管理器。负责创建并维护用户的文件。 - -## 基本概念 - -文件是指用户创建并保存在 Doris 中的文件。 - -一个文件由 `数据库名称(database)`、`分类(catalog)` 和 `文件名(file_name)` 共同定位。同时每个文件也有一个全局唯一的 id(file_id),作为系统内的标识。 - -文件的创建和删除只能由拥有 `admin` 权限的用户进行操作。一个文件隶属于一个数据库。对某一数据库拥有访问权限(查询、导入、修改等等)的用户都可以使用该数据库下创建的文件。 - -## 具体操作 - -文件管理主要有三个命令:`CREATE FILE`,`SHOW FILE` 和 `DROP FILE`,分别为创建、查看和删除文件。这三个命令的具体语法可以通过连接到 Doris 后,执行 `HELP cmd;` 的方式查看帮助。 - -1. CREATE FILE - - 在创建文件的命令中,用户必须提供以下信息: - - * file_name:文件名。用户自定义,在一个 catalog 内唯一即可。 - * catalog:文件所属分类。用户自定义,在一个 database 内唯一即可。 - - > Doris 也有一些特殊的分类名称供特定的命令使用。 - - > 1. kafka - - > 当在例行导入命令中指定数据源为 Kafka,并且需要引用到文件时,Doris 会默认从 catalog 名为 "kafka" 的分类中查找文件。 - - * url:文件的下载地址。目前仅支持无认证的 http 下载地址。该下载地址仅用于在执行创建文件命令时,从这个地址下载文件。当文件成功创建并保存在 Doris 中后,该地址将不再被使用。 - * md5:可选项。文件的 MD5 值。如果用户提供该值,将在文件下载后进行 MD5 值的校验。校验失败则文件创建失败。 - - 文件创建成功后,文件相关的信息将持久化在 Doris 中。用户可以通过 `SHOW FILE` 命令查看已经创建成功的文件。 - -2. SHOW FILE - - 该命令可以查看已经创建成功的文件。具体操作见:`HELP SHOW FILE;` - -3. DROP FILE - - 该命令可以删除一个已经创建的文件。具体操作见:`HELP DROP FILE;` - -## 实现细节 - -### 创建和删除文件 - -当用户执行 `CREATE FILE` 命令后,FE 会从给定的 URL 下载文件。并将文件的内容以 Base64 编码的形式直接保存在 FE 的内存中。同时会将文件内容以及文件相关的元信息持久化在 BDBJE 中。所有被创建的文件,其元信息和文件内容都会常驻于 FE 的内存中。如果 FE 宕机重启,也会从 BDBJE 中加载元信息和文件内容到内存中。当文件被删除时,会直接从 FE 内存中删除相关信息,同时也从 BDBJE 中删除持久化的信息。 - -### 文件的使用 - -如果是 FE 端需要使用创建的文件,则 SmallFileMgr 会直接将 FE 内存中的数据保存为本地文件,存储在指定的目录中,并返回本地的文件路径供使用。 - -如果是 BE 端需要使用创建的文件,BE 会通过 FE 的 http 接口 `/api/get_small_file` 将文件内容下载到 BE 上指定的目录中,供使用。同时,BE 也会在内存中记录当前已经下载过的文件的信息。当 BE 请求一个文件时,会先查看本地文件是否存在并校验。如果校验通过,则直接返回本地文件路径。如果校验失败,则会删除本地文件,重新从 FE 下载。当 BE 重启时,会预先加载本地的文件到内存中。 - -## 使用限制 - -因为文件元信息和内容都存储于 FE 的内存中。所以默认仅支持上传大小在 1MB 以内的文件。并且总文件数量限制为 100 个。可以通过下一小节介绍的配置项进行修改。 - -## 相关配置 - -1. FE 配置 - - * `small_file_dir`:用于存放上传文件的路径,默认为 FE 运行目录的 `small_files/` 目录下。 - * `max_small_file_size_bytes`:单个文件大小限制,单位为字节。默认为 1MB。大于该配置的文件创建将会被拒绝。 - * `max_small_file_number`:一个 Doris 集群支持的总文件数量。默认为 100。当创建的文件数超过这个值后,后续的创建将会被拒绝。 - - > 如果需要上传更多文件或提高单个文件的大小限制,可以通过 `ADMIN SET CONFIG` 命令修改 `max_small_file_size_bytes` 和 `max_small_file_number` 参数。但文件数量和大小的增加,会导致 FE 内存使用量的增加。 - -2. BE 配置 - - * `small_file_dir`:用于存放从 FE 下载的文件的路径,默认为 BE 运行目录的 `lib/small_files/` 目录下。 diff --git a/docs/zh-CN/administrator-guide/sql-mode.md b/docs/zh-CN/administrator-guide/sql-mode.md deleted file mode 100644 index c2ebca85f6..0000000000 --- a/docs/zh-CN/administrator-guide/sql-mode.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -{ - "title": "SQL MODE", - "language": "zh-CN" -} ---- - - - -# SQL MODE - -Doris新支持的sql mode参照了 Mysql 的sql mode管理机制,每个客户端都能设置自己的sql mode,拥有Admin权限的数据库管理员可以设置全局sql mode。 - -## sql mode 介绍 - -sql mode使用户能在不同风格的sql语法和数据校验严格度间做切换,使Doris对其他数据库有更好的兼容性。例如在一些数据库里,'||'符号是一个字符串连接符,但在Doris里却是与'or'等价的,这时用户只需要使用sql mode切换到自己想要的风格。每个客户端都能设置sql mode,并在当前对话中有效,只有拥有Admin权限的用户可以设置全局sql mode。 - -## 原理 - -sql mode用一个64位的Long型存储在SessionVariables中,这个地址的每一位都代表一个mode的开启/禁用(1表示开启,0表示禁用)状态,只要知道每一种mode具体是在哪一位,我们就可以通过位运算方便快速的对sql mode进行校验和操作。 - -每一次对sql mode的查询,都会对此Long型进行一次解析,变成用户可读的字符串形式,同理,用户发送给服务器的sql mode字符串,会被解析成能够存储在SessionVariables中的Long型。 - -已被设置好的全局sql mode会被持久化,因此对全局sql mode的操作总是只需一次,即使程序重启后仍可以恢复上一次的全局sql mode。 - -## 操作方式 - -1、设置sql mode - -``` -set global sql_mode = "" -set session sql_mode = "" -``` ->目前Doris的默认sql mode为空。 ->设置global sql mode需要Admin权限,并会影响所有在此后连接的客户端。 ->设置session sql mode只会影响当前对话客户端,默认为session方式。 - -2、查询sql mode - -``` -select @@global.sql_mode -select @@session.sql_mode -``` ->除了这种方式,你还可以通过下面方式返回所有session variables来查看当前sql mode - -``` -show global variables -show session variables -``` - -## 已支持mode - -1. `PIPES_AS_CONCAT` - - 在此模式下,'||'符号是一种字符串连接符号(同CONCAT()函数),而不是'OR'符号的同义词。(e.g., `'a'||'b' = 'ab'`, `1||0 = '10'`) - -## 复合mode - -(后续补充) \ No newline at end of file diff --git a/docs/zh-CN/administrator-guide/time-zone.md b/docs/zh-CN/administrator-guide/time-zone.md deleted file mode 100644 index 20e784f4d5..0000000000 --- a/docs/zh-CN/administrator-guide/time-zone.md +++ /dev/null @@ -1,91 +0,0 @@ ---- -{ - "title": "时区", - "language": "zh-CN" -} ---- - - - -# 时区 - -Doris 支持多时区设置 - -## 名词解释 - -* FE:Frontend,Doris 的前端节点。负责元数据管理和请求接入。 -* BE:Backend,Doris 的后端节点。负责查询执行和数据存储。 - -## 基本概念 - -Doris 内部存在多个时区相关参数 - -* system_time_zone : - 当服务器启动时,会根据机器设置时区自动设置,设置后不可修改。 - -* time_zone : - 服务器当前时区,区分session级别和global级别 - -## 具体操作 - -1. show variables like '%time_zone%' - - 查看当前时区相关配置 - -2. SET time_zone = 'Asia/Shanghai' - - 该命令可以设置session级别的时区,连接断开后失效 - -3. SET global time_zone = 'Asia/Shanghai' - - 该命令可以设置global级别的时区参数,fe会将参数持久化,连接断开后不失效 - -### 时区的影响 - -时区设置会影响对时区敏感的时间值的显示和存储。 - -包括NOW()或CURTIME()等时间函数显示的值,也包括show load, show backends中的时间值。 - -但不会影响 create table 中时间类型分区列的 less than 值,也不会影响存储为 date/datetime 类型的值的显示。 - -受时区影响的函数: - -* `FROM_UNIXTIME`:给定一个 UTC 时间戳,返回指定时区的日期时间:如 `FROM_UNIXTIME(0)`, 返回 CST 时区:`1970-01-01 08:00:00`。 -* `UNIX_TIMESTAMP`:给定一个指定时区日期时间,返回 UTC 时间戳:如 CST 时区 `UNIX_TIMESTAMP('1970-01-01 08:00:00')`,返回 `0`。 -* `CURTIME`:返回指定时区时间。 -* `NOW`:返指定地时区日期时间。 -* `CONVERT_TZ`:将一个日期时间从一个指定时区转换到另一个指定时区。 - -## 使用限制 - -时区值可以使用几种格式给出,不区分大小写: - -* 表示UTC偏移量的字符串,如'+10:00'或'-6:00' - -* 标准时区格式,如"Asia/Shanghai"、"America/Los_Angeles" - -* 不支持缩写时区格式,如"MET"、"CTT"。因为缩写时区在不同场景下存在歧义,不建议使用。 - -* 为了兼容Doris,支持CST缩写时区,内部会将CST转移为"Asia/Shanghai"的中国标准时区 - -## 时区格式列表 - -[List of tz database time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) - diff --git a/docs/zh-CN/administrator-guide/update.md b/docs/zh-CN/administrator-guide/update.md deleted file mode 100644 index c994c8a6c7..0000000000 --- a/docs/zh-CN/administrator-guide/update.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -{ - "title": "更新", - "language": "zh-CN" -} ---- - - - -# 更新 - -如果我们需要修改或更新 Doris 中的数据,就可以使用 UPDATE 命令来操作。 - -## 适用场景 - -+ 对满足某些条件的行,修改他的取值。 -+ 点更新,小范围更新,待更新的行最好是整个表的非常小一部分。 -+ update 命令只能在 Unique 数据模型的表中操作。 - -## 名词解释 - -1. Unique 模型:Doris 系统中的一种数据模型。将列分为两类,Key 和 Value。当用户导入相同 Key 的行时,后者的 Value 会覆盖已有的 Value。与 Mysql 中的 Unique 含义一致。 - -## 基本原理 - -利用查询引擎自身的 where 过滤逻辑,从待更新表中筛选出需要被更新的行。再利用 Unique 模型自带的 Value 列新数据替换旧数据的逻辑,将待更新的行变更后,再重新插入到表中。从而实现行级别更新。 - -举例说明 - -假设 Doris 中存在一张订单表,其中 订单id 是 Key 列,订单状态,订单金额是 Value 列。数据状态如下: - -|订单id | 订单金额| 订单状态| -|---|---|---| -| 1 | 100| 待付款 | - -这时候,用户点击付款后,Doris 系统需要将订单id 为 '1' 的订单状态变更为 '待发货', 就需要用到 Update 功能。 - -``` -UPDATE order SET 订单状态='待发货' WHERE 订单id=1; -``` - -用户执行 UPDATE 命令后,系统会进行如下三步: - -+ 第一步:读取满足 WHERE 订单id=1 的行 - (1,100,'待付款') -+ 第二步:变更该行的订单状态,从'待付款'改为'待发货' - (1,100,'待发货') -+ 第三步:将更新后的行再插入回表中,从而达到更新的效果。 - |订单id | 订单金额| 订单状态| - |---|---|---| - | 1 | 100| 待付款 | - | 1 | 100 | 待发货 | - 由于表 order 是 UNIQUE 模型,所以相同 Key 的行,之后后者才会生效,所以最终效果如下: - |订单id | 订单金额| 订单状态| - |---|---|---| - | 1 | 100 | 待发货 | - -## 基本操作 - -### UPDATE 语法 - -```UPDATE table_name SET value=xxx WHERE condition;``` - -+ `table_name`: 待更新的表,必须是 UNIQUE 模型的表才能进行更新。 - -+ value=xxx: 待更新的列,等式左边必须是表的 value 列。等式右边可以是常量,也可以是某个表中某列的表达式变换。 - 比如 value = 1, 则待更新的列值会变为1。 - 比如 value = value +1, 则待更新的列值会自增1。 - -+ condition:只有满足 condition 的行才会被更新。condition 必须是一个结果为 Boolean 类型的表达式。 - 比如 k1 = 1, 则只有当 k1 列值为1的行才会被更新。 - 比如 k1 = k2, 则只有 k1 列值和 k2 列一样的行才会被更新。 - 不支持不填写condition,也就是不支持全表更新。 - -### 同步 - -Update 语法在 Doris 中是一个同步语法,既 Update 语句成功,更新就成功了,数据可见。 - -### 性能 - -Update 语句的性能和待更新的行数,以及 condition 的检索效率密切相关。 - -+ 待更新的行数:待更新的行数越多,Update 语句的速度就会越慢。这和导入的原理是一致的。 - Doris 的更新比较合适偶发更新的场景,比如修改个别行的值。 - Doris 并不适合大批量的修改数据。大批量修改会使得 Update 语句运行时间很久。 - -+ condition 的检索效率:Doris 的 Update 实现原理是先将满足 condition 的行读取处理,所以如果 condition 的检索效率高,则 Update 的速度也会快。 - condition 列最好能命中,索引或者分区分桶裁剪。这样 Doris 就不需要扫全表,可以快速定位到需要更新的行。从而提升更新效率。 - 强烈不推荐 condition 列中包含 UNIQUE 模型的 value 列。 - -### 并发控制 - -默认情况下,并不允许同一时间对同一张表并发进行多个 Update 操作。 - -主要原因是,Doris 目前支持的是行更新,这意味着,即使用户声明的是 ```SET v2 = 1```,实际上,其他所有的 Value 列也会被覆盖一遍(尽管值没有变化)。 - -这就会存在一个问题,如果同时有两个 Update 操作对同一行进行更新,那么其行为可能是不确定的。也就是可能存在脏数据。 - -但在实际应用中,如果用户自己可以保证即使并发更新,也不会同时对同一行进行操作的话,就可以手动打开并发限制。通过修改 FE 配置 ```enable_concurrent_update```。当配置值为 true 时,则对更新并发无限制。 - -## 使用风险 - -由于 Doris 目前支持的是行更新,并且采用的是读取后再写入的两步操作,则如果 Update 语句和其他导入或 Delete 语句刚好修改的是同一行时,存在不确定的数据结果。 - -所以用户在使用的时候,一定要注意*用户侧自己*进行 Update 语句和其他 DML 语句的并发控制。 - -## 版本 - -Doris Version 0.15.x + diff --git a/docs/zh-CN/administrator-guide/variables.md b/docs/zh-CN/administrator-guide/variables.md deleted file mode 100644 index 219e86049e..0000000000 --- a/docs/zh-CN/administrator-guide/variables.md +++ /dev/null @@ -1,492 +0,0 @@ ---- -{ - "title": "变量", - "language": "zh-CN" -} ---- - - - -# 变量 - -本文档主要介绍当前支持的变量(variables)。 - -Doris 中的变量参考 MySQL 中的变量设置。但部分变量仅用于兼容一些 MySQL 客户端协议,并不产生其在 MySQL 数据库中的实际意义。 - -## 变量设置与查看 - -### 查看 - -可以通过 `SHOW VARIABLES [LIKE 'xxx'];` 查看所有或指定的变量。如: - -``` -SHOW VARIABLES; -SHOW VARIABLES LIKE '%time_zone%'; -``` - -### 设置 - -部分变量可以设置全局生效或仅当前会话生效。设置全局生效后,后续新的会话连接中会沿用设置值。而设置仅当前会话生效,则变量仅对当前会话产生作用。 - -仅当前会话生效,通过 `SET var_name=xxx;` 语句来设置。如: - -``` -SET exec_mem_limit = 137438953472; -SET forward_to_master = true; -SET time_zone = "Asia/Shanghai"; -``` - -全局生效,通过 `SET GLOBAL var_name=xxx;` 设置。如: - -``` -SET GLOBAL exec_mem_limit = 137438953472 -``` - -> 注1:只有 ADMIN 用户可以设置变量的全局生效。 -> 注2:全局生效的变量不影响当前会话的变量值,仅影响新的会话中的变量。 - -既支持当前会话生效又支持全局生效的变量包括: - -* `time_zone` -* `wait_timeout` -* `sql_mode` -* `enable_profile` -* `query_timeout` -* `exec_mem_limit` -* `batch_size` -* `allow_partition_column_nullable` -* `insert_visible_timeout_ms` -* `enable_fold_constant_by_be` - -只支持全局生效的变量包括: - -* `default_rowset_type` - -同时,变量设置也支持常量表达式。如: - -``` -SET exec_mem_limit = 10 * 1024 * 1024 * 1024; -SET forward_to_master = concat('tr', 'u', 'e'); -``` - -### 在查询语句中设置变量 - -在一些场景中,我们可能需要对某些查询有针对性的设置变量。 -通过使用SET_VAR提示可以在查询中设置会话变量(在单个语句内生效)。例子: - -``` -SELECT /*+ SET_VAR(exec_mem_limit = 8589934592) */ name FROM people ORDER BY name; -SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3); -``` - -注意注释必须以/*+ 开头,并且只能跟随在SELECT之后。 - -## 支持的变量 - -* `SQL_AUTO_IS_NULL` - - 用于兼容 JDBC 连接池 C3P0。 无实际作用。 - -* `auto_increment_increment` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `autocommit` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `batch_size` - - 用于指定在查询执行过程中,各个节点传输的单个数据包的行数。默认一个数据包的行数为 1024 行,即源端节点每产生 1024 行数据后,打包发给目的节点。 - - 较大的行数,会在扫描大数据量场景下提升查询的吞吐,但可能会在小查询场景下增加查询延迟。同时,也会增加查询的内存开销。建议设置范围 1024 至 4096。 - -* `character_set_client` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `character_set_connection` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `character_set_results` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `character_set_server` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `codegen_level` - - 用于设置 LLVM codegen 的等级。(当前未生效)。 - -* `collation_connection` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `collation_database` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `collation_server` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `delete_without_partition` - - 设置为 true 时。当使用 delete 命令删除分区表数据时,可以不指定分区。delete 操作将会自动应用到所有分区。 - - 但注意,自动应用到所有分区可能到导致 delete 命令耗时触发大量子任务导致耗时较长。如无必要,不建议开启。 - -* `disable_colocate_join` - - 控制是否启用 [Colocation Join](./colocation-join.md) 功能。默认为 false,表示启用该功能。true 表示禁用该功能。当该功能被禁用后,查询规划将不会尝试执行 Colocation Join。 - -* `enable_bucket_shuffle_join` - - 控制是否启用 [Bucket Shuffle Join](./bucket-shuffle-join.md) 功能。默认为 true,表示启用该功能。false 表示禁用该功能。当该功能被禁用后,查询规划将不会尝试执行 Bucket Shuffle Join。 - -* `disable_streaming_preaggregations` - - 控制是否开启流式预聚合。默认为 false,即开启。当前不可设置,且默认开启。 - -* `enable_insert_strict` - - 用于设置通过 INSERT 语句进行数据导入时,是否开启 `strict` 模式。默认为 false,即不开启 `strict` 模式。关于该模式的介绍,可以参阅 [这里](./load-data/insert-into-manual.md)。 - -* `enable_spilling` - - 用于设置是否开启大数据量落盘排序。默认为 false,即关闭该功能。当用户未指定 ORDER BY 子句的 LIMIT 条件,同时设置 `enable_spilling` 为 true 时,才会开启落盘排序。该功能启用后,会使用 BE 数据目录下 `doris-scratch/` 目录存放临时的落盘数据,并在查询结束后,清空临时数据。 - - 该功能主要用于使用有限的内存进行大数据量的排序操作。 - - 注意,该功能为实验性质,不保证稳定性,请谨慎开启。 - -* `exec_mem_limit` - - 用于设置单个查询的内存限制。默认为 2GB,单位为B/K/KB/M/MB/G/GB/T/TB/P/PB, 默认为B。 - - 该参数用于限制一个查询计划中,单个查询计划的实例所能使用的内存。一个查询计划可能有多个实例,一个 BE 节点可能执行一个或多个实例。所以该参数并不能准确限制一个查询在整个集群的内存使用,也不能准确限制一个查询在单一 BE 节点上的内存使用。具体需要根据生成的查询计划判断。 - - 通常只有在一些阻塞节点(如排序节点、聚合节点、Join 节点)上才会消耗较多的内存,而其他节点(如扫描节点)中,数据为流式通过,并不会占用较多的内存。 - - 当出现 `Memory Exceed Limit` 错误时,可以尝试指数级增加该参数,如 4G、8G、16G 等。 - -* `forward_to_master` - - 用户设置是否将一些show 类命令转发到 Master FE 节点执行。默认为 `true`,即转发。Doris 中存在多个 FE 节点,其中一个为 Master 节点。通常用户可以连接任意 FE 节点进行全功能操作。但部分信息查看指令,只有从 Master FE 节点才能获取详细信息。 - - 如 `SHOW BACKENDS;` 命令,如果不转发到 Master FE 节点,则仅能看到节点是否存活等一些基本信息,而转发到 Master FE 则可以获取包括节点启动时间、最后一次心跳时间等更详细的信息。 - - 当前受该参数影响的命令如下: - - 1. `SHOW FRONTENDS;` - - 转发到 Master 可以查看最后一次心跳信息。 - - 2. `SHOW BACKENDS;` - - 转发到 Master 可以查看启动时间、最后一次心跳信息、磁盘容量信息。 - - 3. `SHOW BROKER;` - - 转发到 Master 可以查看启动时间、最后一次心跳信息。 - - 4. `SHOW TABLET;`/`ADMIN SHOW REPLICA DISTRIBUTION;`/`ADMIN SHOW REPLICA STATUS;` - - 转发到 Master 可以查看 Master FE 元数据中存储的 tablet 信息。正常情况下,不同 FE 元数据中 tablet 信息应该是一致的。当出现问题时,可以通过这个方法比较当前 FE 和 Master FE 元数据的差异。 - - 5. `SHOW PROC;` - - 转发到 Master 可以查看 Master FE 元数据中存储的相关 PROC 的信息。主要用于元数据比对。 - -* `init_connect` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `interactive_timeout` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `enable_profile` - - 用于设置是否需要查看查询的 profile。默认为 false,即不需要 profile。 - - 默认情况下,只有在查询发生错误时,BE 才会发送 profile 给 FE,用于查看错误。正常结束的查询不会发送 profile。发送 profile 会产生一定的网络开销,对高并发查询场景不利。 - 当用户希望对一个查询的 profile 进行分析时,可以将这个变量设为 true 后,发送查询。查询结束后,可以通过在当前连接的 FE 的 web 页面查看到 profile: - - `fe_host:fe_http_port/query` - - 其中会显示最近100条,开启 `enable_profile` 的查询的 profile。 - -* `language` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `license` - - 显示 Doris 的 License。无其他作用。 - -* `load_mem_limit` - - 用于指定导入操作的内存限制。默认为 0,即表示不使用该变量,而采用 `exec_mem_limit` 作为导入操作的内存限制。 - - 这个变量仅用于 INSERT 操作。因为 INSERT 操作设计查询和导入两个部分,如果用户不设置此变量,则查询和导入操作各自的内存限制均为 `exec_mem_limit`。否则,INSERT 的查询部分内存限制为 `exec_mem_limit`,而导入部分限制为 `load_mem_limit`。 - - 其他导入方式,如 BROKER LOAD,STREAM LOAD 的内存限制依然使用 `exec_mem_limit`。 - -* `lower_case_table_names` - - 用于控制用户表表名大小写是否敏感。 - - 值为 0 时,表名大小写敏感。默认为0。 - - 值为 1 时,表名大小写不敏感,doris在存储和查询时会将表名转换为小写。 - 优点是在一条语句中可以使用表名的任意大小写形式,下面的sql是正确的: - ``` - mysql> show tables; - +------------------+ - | Tables_in_testdb | - +------------------+ - | cost | - +------------------+ - - mysql> select * from COST where COst.id < 100 order by cost.id; - ``` - 缺点是建表后无法获得建表语句中指定的表名,`show tables` 查看的表名为指定表名的小写。 - - 值为 2 时,表名大小写不敏感,doris存储建表语句中指定的表名,查询时转换为小写进行比较。 - 优点是`show tables` 查看的表名为建表语句中指定的表名; - 缺点是同一语句中只能使用表名的一种大小写形式,例如对`cost` 表使用表名 `COST` 进行查询: - ``` - mysql> select * from COST where COST.id < 100 order by COST.id; - ``` - - 该变量兼容MySQL。需在集群初始化时通过fe.conf 指定 `lower_case_table_names=`进行配置,集群初始化完成后无法通过`set` 语句修改该变量,也无法通过重启、升级集群修改该变量。 - - information_schema中的系统视图表名不区分大小写,当`lower_case_table_names`值为 0 时,表现为 2。 - -* `max_allowed_packet` - - 用于兼容 JDBC 连接池 C3P0。 无实际作用。 - -* `max_pushdown_conditions_per_column` - - 该变量的具体含义请参阅 [BE 配置项](./config/be_config.md) 中 `max_pushdown_conditions_per_column` 的说明。该变量默认置为 -1,表示使用 `be.conf` 中的配置值。如果设置大于 0,则当前会话中的查询会使用该变量值,而忽略 `be.conf` 中的配置值。 - -* `max_scan_key_num` - - 该变量的具体含义请参阅 [BE 配置项](./config/be_config.md) 中 `doris_max_scan_key_num` 的说明。该变量默认置为 -1,表示使用 `be.conf` 中的配置值。如果设置大于 0,则当前会话中的查询会使用该变量值,而忽略 `be.conf` 中的配置值。 - -* `net_buffer_length` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `net_read_timeout` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `net_write_timeout` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `parallel_exchange_instance_num` - - 用于设置执行计划中,一个上层节点接收下层节点数据所使用的 exchange node 数量。默认为 -1,即表示 exchange node 数量等于下层节点执行实例的个数(默认行为)。当设置大于0,并且小于下层节点执行实例的个数,则 exchange node 数量等于设置值。 - - 在一个分布式的查询执行计划中,上层节点通常有一个或多个 exchange node 用于接收来自下层节点在不同 BE 上的执行实例的数据。通常 exchange node 数量等于下层节点执行实例数量。 - - 在一些聚合查询场景下,如果底层需要扫描的数据量较大,但聚合之后的数据量很小,则可以尝试修改此变量为一个较小的值,可以降低此类查询的资源开销。如在 DUPLICATE KEY 明细模型上进行聚合查询的场景。 - -* `parallel_fragment_exec_instance_num` - - 针对扫描节点,设置其在每个 BE 节点上,执行实例的个数。默认为 1。 - - 一个查询计划通常会产生一组 scan range,即需要扫描的数据范围。这些数据分布在多个 BE 节点上。一个 BE 节点会有一个或多个 scan range。默认情况下,每个 BE 节点的一组 scan range 只由一个执行实例处理。当机器资源比较充裕时,可以将增加该变量,让更多的执行实例同时处理一组 scan range,从而提升查询效率。 - - 而 scan 实例的数量决定了上层其他执行节点,如聚合节点,join 节点的数量。因此相当于增加了整个查询计划执行的并发度。修改该参数会对大查询效率提升有帮助,但较大数值会消耗更多的机器资源,如CPU、内存、磁盘IO。 - -* `query_cache_size` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `query_cache_type` - - 用于兼容 JDBC 连接池 C3P0。 无实际作用。 - -* `query_timeout` - - 用于设置查询超时。该变量会作用于当前连接中所有的查询语句,以及 INSERT 语句。默认为 5 分钟,单位为秒。 - -* `resource_group` - - 暂不使用。 -* `send_batch_parallelism` - - 用于设置执行 InsertStmt 操作时发送批处理数据的默认并行度,如果并行度的值超过 BE 配置中的 `max_send_batch_parallelism_per_job`,那么作为协调点的 BE 将使用 `max_send_batch_parallelism_per_job` 的值。 - -* `sql_mode` - - 用于指定 SQL 模式,以适应某些 SQL 方言。关于 SQL 模式,可参阅 [这里](./sql-mode.md)。 - -* `sql_safe_updates` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `sql_select_limit` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `system_time_zone` - - 显示当前系统时区。不可更改。 - -* `time_zone` - - 用于设置当前会话的时区。时区会对某些时间函数的结果产生影响。关于时区,可以参阅 [这里](./time-zone.md)。 - -* `tx_isolation` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `tx_read_only` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `transaction_read_only` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `transaction_isolation` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `version` - - 用于兼容 MySQL 客户端。无实际作用。 - -* `performance_schema` - - 用于兼容 8.0.16及以上版本的MySQL JDBC。无实际作用。 - -* `version_comment` - - 用于显示 Doris 的版本。不可更改。 - -* `wait_timeout` - - 用于设置空闲连接的连接时长。当一个空闲连接在该时长内与 Doris 没有任何交互,则 Doris 会主动断开这个链接。默认为 8 小时,单位为秒。 - -* `default_rowset_type` - - 用于设置计算节点存储引擎默认的存储格式。当前支持的存储格式包括:alpha/beta。 - -* `use_v2_rollup` - - 用于控制查询使用segment v2存储格式的rollup索引获取数据。该变量用于上线segment v2的时候,进行验证使用;其他情况,不建议使用。 - -* `rewrite_count_distinct_to_bitmap_hll` - - 是否将 bitmap 和 hll 类型的 count distinct 查询重写为 bitmap_union_count 和 hll_union_agg 。 - -* `prefer_join_method` - - 在选择join的具体实现方式是broadcast join还是shuffle join时,如果broadcast join cost和shuffle join cost相等时,优先选择哪种join方式。 - - 目前该变量的可选值为"broadcast" 或者 "shuffle"。 - -* `allow_partition_column_nullable` - - 建表时是否允许分区列为NULL。默认为true,表示允许为NULL。false 表示分区列必须被定义为NOT NULL - -* `insert_visible_timeout_ms` - - 在执行insert语句时,导入动作(查询和插入)完成后,还需要等待事务提交,使数据可见。此参数控制等待数据可见的超时时间,默认为10000,最小为1000。 - -* `enable_exchange_node_parallel_merge` - - 在一个排序的查询之中,一个上层节点接收下层节点有序数据时,会在exchange node上进行对应的排序来保证最终的数据是有序的。但是单线程进行多路数据归并时,如果数据量过大,会导致exchange node的单点的归并瓶颈。 - - Doris在这部分进行了优化处理,如果下层的数据节点过多。exchange node会启动多线程进行并行归并来加速排序过程。该参数默认为False,即表示 exchange node 不采取并行的归并排序,来减少额外的CPU和内存消耗。 - -* `extract_wide_range_expr` - - 用于控制是否开启 「宽泛公因式提取」的优化。取值有两种:true 和 false 。默认情况下开启。 - -* `enable_fold_constant_by_be` - - 用于控制常量折叠的计算方式。默认是 `false`,即在 `FE` 进行计算;若设置为 `true`,则通过 `RPC` 请求经 `BE` 计算。 - -* `cpu_resource_limit` - - 用于限制一个查询的资源开销。这是一个实验性质的功能。目前的实现是限制一个查询在单个节点上的scan线程数量。限制了scan线程数,从底层返回的数据速度变慢,从而限制了查询整体的计算资源开销。假设设置为 2,则一个查询在单节点上最多使用2个scan线程。 - - 该参数会覆盖 `parallel_fragment_exec_instance_num` 的效果。即假设 `parallel_fragment_exec_instance_num` 设置为4,而该参数设置为2。则单个节点上的4个执行实例会共享最多2个扫描线程。 - - 该参数会被 user property 中的 `cpu_resource_limit` 配置覆盖。 - - 默认 -1,即不限制。 - -* `disable_join_reorder` - - 用于关闭所有系统自动的 join reorder 算法。取值有两种:true 和 false。默认行况下关闭,也就是采用系统自动的 join reorder 算法。设置为 true 后,系统会关闭所有自动排序的算法,采用 SQL 原始的表顺序,执行 join - -* `return_object_data_as_binary` - 用于标识是否在select 结果中返回bitmap/hll 结果。在 select into outfile 语句中,如果导出文件格式为csv 则会将 bimap/hll 数据进行base64编码,如果是parquet 文件格式 将会把数据作为byte array 存储 -* `block_encryption_mode` - 可以通过block_encryption_mode参数,控制块加密模式,默认值为:空。当使用AES算法加密时相当于`AES_128_ECB`, 当时用SM3算法加密时相当于`SM3_128_ECB` - 可选值: -``` - AES_128_ECB, - AES_192_ECB, - AES_256_ECB, - AES_128_CBC, - AES_192_CBC, - AES_256_CBC, - AES_128_CFB, - AES_192_CFB, - AES_256_CFB, - AES_128_CFB1, - AES_192_CFB1, - AES_256_CFB1, - AES_128_CFB8, - AES_192_CFB8, - AES_256_CFB8, - AES_128_CFB128, - AES_192_CFB128, - AES_256_CFB128, - AES_128_CTR, - AES_192_CTR, - AES_256_CTR, - AES_128_OFB, - AES_192_OFB, - AES_256_OFB, - SM4_128_ECB, - SM4_128_CBC, - SM4_128_CFB128, - SM4_128_OFB, - SM4_128_CTR, -``` - -* `enable_infer_predicate` - - 用于控制是否进行谓词推导。取值有两种:true 和 false。默认情况下关闭,系统不在进行谓词推导,采用原始的谓词进行相关操作。设置为 true 后,进行谓词扩展。 - diff --git a/new-docs/zh-CN/advanced/alter-table/replace-table.md b/docs/zh-CN/advanced/alter-table/replace-table.md similarity index 100% rename from new-docs/zh-CN/advanced/alter-table/replace-table.md rename to docs/zh-CN/advanced/alter-table/replace-table.md diff --git a/new-docs/zh-CN/advanced/alter-table/schema-change.md b/docs/zh-CN/advanced/alter-table/schema-change.md similarity index 100% rename from new-docs/zh-CN/advanced/alter-table/schema-change.md rename to docs/zh-CN/advanced/alter-table/schema-change.md diff --git a/new-docs/zh-CN/advanced/best-practice/debug-log.md b/docs/zh-CN/advanced/best-practice/debug-log.md similarity index 100% rename from new-docs/zh-CN/advanced/best-practice/debug-log.md rename to docs/zh-CN/advanced/best-practice/debug-log.md diff --git a/new-docs/zh-CN/advanced/best-practice/import-analysis.md b/docs/zh-CN/advanced/best-practice/import-analysis.md similarity index 100% rename from new-docs/zh-CN/advanced/best-practice/import-analysis.md rename to docs/zh-CN/advanced/best-practice/import-analysis.md diff --git a/new-docs/zh-CN/advanced/best-practice/query-analysis.md b/docs/zh-CN/advanced/best-practice/query-analysis.md similarity index 100% rename from new-docs/zh-CN/advanced/best-practice/query-analysis.md rename to docs/zh-CN/advanced/best-practice/query-analysis.md diff --git a/new-docs/zh-CN/advanced/broker.md b/docs/zh-CN/advanced/broker.md similarity index 100% rename from new-docs/zh-CN/advanced/broker.md rename to docs/zh-CN/advanced/broker.md diff --git a/new-docs/zh-CN/advanced/cache/partition-cache.md b/docs/zh-CN/advanced/cache/partition-cache.md similarity index 100% rename from new-docs/zh-CN/advanced/cache/partition-cache.md rename to docs/zh-CN/advanced/cache/partition-cache.md diff --git a/new-docs/zh-CN/advanced/join-optimization/bucket-shuffle-join.md b/docs/zh-CN/advanced/join-optimization/bucket-shuffle-join.md similarity index 100% rename from new-docs/zh-CN/advanced/join-optimization/bucket-shuffle-join.md rename to docs/zh-CN/advanced/join-optimization/bucket-shuffle-join.md diff --git a/new-docs/zh-CN/advanced/join-optimization/colocation-join.md b/docs/zh-CN/advanced/join-optimization/colocation-join.md similarity index 100% rename from new-docs/zh-CN/advanced/join-optimization/colocation-join.md rename to docs/zh-CN/advanced/join-optimization/colocation-join.md diff --git a/new-docs/zh-CN/advanced/join-optimization/runtime-filter.md b/docs/zh-CN/advanced/join-optimization/runtime-filter.md similarity index 100% rename from new-docs/zh-CN/advanced/join-optimization/runtime-filter.md rename to docs/zh-CN/advanced/join-optimization/runtime-filter.md diff --git a/new-docs/zh-CN/advanced/materialized-view.md b/docs/zh-CN/advanced/materialized-view.md similarity index 100% rename from new-docs/zh-CN/advanced/materialized-view.md rename to docs/zh-CN/advanced/materialized-view.md diff --git a/new-docs/zh-CN/advanced/orthogonal-bitmap-manual.md b/docs/zh-CN/advanced/orthogonal-bitmap-manual.md similarity index 100% rename from new-docs/zh-CN/advanced/orthogonal-bitmap-manual.md rename to docs/zh-CN/advanced/orthogonal-bitmap-manual.md diff --git a/new-docs/zh-CN/advanced/partition/dynamic-partition.md b/docs/zh-CN/advanced/partition/dynamic-partition.md similarity index 100% rename from new-docs/zh-CN/advanced/partition/dynamic-partition.md rename to docs/zh-CN/advanced/partition/dynamic-partition.md diff --git a/new-docs/zh-CN/advanced/partition/table-tmp-partition.md b/docs/zh-CN/advanced/partition/table-tmp-partition.md similarity index 100% rename from new-docs/zh-CN/advanced/partition/table-tmp-partition.md rename to docs/zh-CN/advanced/partition/table-tmp-partition.md diff --git a/new-docs/zh-CN/advanced/resource.md b/docs/zh-CN/advanced/resource.md similarity index 100% rename from new-docs/zh-CN/advanced/resource.md rename to docs/zh-CN/advanced/resource.md diff --git a/new-docs/zh-CN/advanced/small-file-mgr.md b/docs/zh-CN/advanced/small-file-mgr.md similarity index 100% rename from new-docs/zh-CN/advanced/small-file-mgr.md rename to docs/zh-CN/advanced/small-file-mgr.md diff --git a/new-docs/zh-CN/advanced/time-zone.md b/docs/zh-CN/advanced/time-zone.md similarity index 100% rename from new-docs/zh-CN/advanced/time-zone.md rename to docs/zh-CN/advanced/time-zone.md diff --git a/new-docs/zh-CN/advanced/variables.md b/docs/zh-CN/advanced/variables.md similarity index 100% rename from new-docs/zh-CN/advanced/variables.md rename to docs/zh-CN/advanced/variables.md diff --git a/docs/zh-CN/administrator-guide/vectorized-execution-engine.md b/docs/zh-CN/advanced/vectorized-execution-engine.md similarity index 100% rename from docs/zh-CN/administrator-guide/vectorized-execution-engine.md rename to docs/zh-CN/advanced/vectorized-execution-engine.md diff --git a/docs/zh-CN/benchmark/samples.md b/docs/zh-CN/benchmark/samples.md deleted file mode 100644 index 8ee915936c..0000000000 --- a/docs/zh-CN/benchmark/samples.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -{ - "title": "使用示例", - "language": "zh-CN" -} ---- - - - -# 使用示例 - -Doris 代码库中提供了丰富的使用示例,能够帮助 Doris 用户快速上手体验 Doris 的功能。 - -## 示例说明 - -示例代码都存放在 Doris 代码库的 [`samples/`](https://github.com/apache/incubator-doris/tree/master/samples) 目录下。 - -``` -. -├── connect -├── doris-demo -├── insert -└── mini_load -``` - -* `connect/` - - 该目录下主要展示了各个程序语言连接 Doris 的代码示例。 - -* `doris-demo/` - - 该目下主要以 Maven 工程的形式,展示了 Doris 多个功能的代码示例。如 spark-connector 和 flink-connector 的使用示例、与 Spring 框架集成的示例、Stream Load 导入示例等等。 - -* `insert/` - - 该目录展示了通过 python 或 shell 脚本调用 Doris 的 Insert 命令导入数据的一些代码示例。 - -* `miniload/` - - 该目录展示了通过 python 调用 mini load 进行数据导入的代码示例。但因为 mini load 功能已由 stream load 功能代替,建议使用 stream load 功能进行数据导入。 \ No newline at end of file diff --git a/new-docs/zh-CN/benchmark/ssb.md b/docs/zh-CN/benchmark/ssb.md similarity index 100% rename from new-docs/zh-CN/benchmark/ssb.md rename to docs/zh-CN/benchmark/ssb.md diff --git a/docs/zh-CN/benchmark/star-schema-benchmark.md b/docs/zh-CN/benchmark/star-schema-benchmark.md deleted file mode 100644 index 9d9b1b405f..0000000000 --- a/docs/zh-CN/benchmark/star-schema-benchmark.md +++ /dev/null @@ -1,182 +0,0 @@ ---- -{ - "title": "Star-Schema-Benchmark 测试", - "language": "zh-CN" -} ---- - - - -# Star Schema Benchmark - -[Star Schema Benchmark(SSB)](https://www.cs.umb.edu/~poneil/StarSchemaB.PDF) 是一个轻量级的数仓场景下的性能测试集。SSB基于 [TPC-H](http://www.tpc.org/tpch/) 提供了一个简化版的星型模型数据集,主要用于测试在星型模型下,多表关联查询的性能表现。 - -本文档主要介绍如何在 Doris 中通过 SSB 进行初步的性能测试。 - -> 注1:包括 SSB 在内的标准测试集通常和实际业务场景差距较大,并且部分测试会针对测试集进行参数调优。所以标准测试集的测试结果仅能反映数据库在特定场景下的性能表现。建议用户使用实际业务数据进行进一步的测试。 -> -> 注2:本文档涉及的操作都在 CentOS 7 环境进行。 - -## 环境准备 - -请先参照 [官方文档](http://doris.incubator.apache.org/master/zh-CN/installing/install-deploy.html) 进行 Doris 的安装部署,以获得一个正常运行中的 Doris 集群(至少包含 1 FE,1 BE)。 - -以下文档中涉及的脚本都存放在 Doris 代码库的 `tools/ssb-tools/` 下。 - -## 数据准备 - -### 1. 下载安装 SSB 数据生成工具。 - -执行以下脚本下载并编译 [ssb-dbgen](https://github.com/electrum/ssb-dbgen.git) 工具。 - -``` -sh build-ssb-dbgen.sh -``` - -安装成功后,将在 `ssb-dbgen/` 目录下生成 `dbgen` 二进制文件。 - -### 2. 生成 SSB 测试集 - -执行以下脚本生成 SSB 数据集: - -``` -sh gen-ssb-data.sh -s 100 -c 100 -``` - -> 注1:通过 `sh gen-ssb-data.sh -h` 查看脚本帮助。 -> -> 注2:数据会以 `.tbl` 为后缀生成在 `ssb-data/` 目录下。文件总大小约60GB。生成时间可能在数分钟到1小时不等。 -> -> 注3:`-s 100` 表示测试集大小系数为 100,`-c 100` 表示并发100个线程生成 lineorder 表的数据。`-c` 参数也决定了最终 lineorder 表的文件数量。参数越大,文件数越多,每个文件越小。 - -在 `-s 100` 参数下,生成的数据集大小为: - -|Table |Rows |Size | File Number | -|---|---|---|---| -|lineorder| 6亿(600037902) | 60GB | 100| -|customer|300万(3000000) |277M |1| -|part|140万(1400000) | 116M|1| -|supplier|20万(200000) |17M |1| -|date| 2556|228K |1| - -3. 建表 - - 复制 [create-tables.sql](https://github.com/apache/incubator-doris/tree/master/tools/ssb-tools/create-tables.sql) 中的建表语句,在 Doris 中执行。 - -4. 导入数据 - - 0. 准备 'doris-cluster.conf' 文件。 - - 在调用导入脚本前,需要将 FE 的 ip 端口等信息写在 `doris-cluster.conf` 文件中。 - - 文件位置和 `load-dimension-data.sh` 平级。 - - 文件内容包括 FE 的 ip,HTTP 端口,用户名,密码以及待导入数据的 DB 名称: - - ``` - export FE_HOST="xxx" - export FE_HTTP_PORT="8030" - export USER="root" - export PASSWORD='xxx' - export DB="ssb" - ``` - - 1. 导入 4 张维度表数据(customer, part, supplier and date) - - 因为这4张维表数据量较小,导入较简单,我们使用以下命令先导入这4表的数据: - - `sh load-dimension-data.sh` - - 2. 导入事实表 lineorder。 - - 通过以下命令导入 lineorder 表数据: - - `sh load-fact-data.sh -c 5` - - `-c 5` 表示启动 5 个并发线程导入(默认为3)。在单 BE 节点情况下,由 `sh gen-ssb-data.sh -s 100 -c 100` 生成的 lineorder 数据,使用 `sh load-fact-data.sh -c 3` 的导入时间约为 10min。内存开销约为 5-6GB。如果开启更多线程,可以加快导入速度,但会增加额外的内存开销。 - - > 注:为获得更快的导入速度,你可以在 be.conf 中添加 `flush_thread_num_per_store=5` 后重启BE。该配置表示每个数据目录的写盘线程数,默认为2。较大的数据可以提升写数据吞吐,但可能会增加 IO Util。(参考值:1块机械磁盘,在默认为2的情况下,导入过程中的 IO Util 约为12%,设置为5时,IO Util 约为26%。如果是 SSD 盘,则几乎为 0)。 - -5. 检查导入数据 - - ``` - select count(*) from part; - select count(*) from customer; - select count(*) from supplier; - select count(*) from date; - select count(*) from lineorder; - ``` - - 数据量应和生成数据的行数一致。 - -## 查询测试 - -SSB 测试集共 4 组 14 个 SQL。查询语句在 [queries/](https://github.com/apache/incubator-doris/tree/master/tools/ssb-tools/queries) 目录下。 - -## 测试报告 - -以下测试报告基于 Doris [branch-0.15](https://github.com/apache/incubator-doris/tree/branch-0.15) 分支代码测试,仅供参考。(更新时间:2021年10月25号) - -1. 硬件环境 - - * 1 FE + 1-3 BE 混部 - * CPU:96core, Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz - * 内存:384GB - * 硬盘:1块机械硬盘 - * 网卡:万兆网卡 - -2. 数据集 - - |Table |Rows |Origin Size | Compacted Size(1 Replica) | - |---|---|---|---| - |lineorder| 6亿(600037902) | 60 GB | 14.846 GB | - |customer|300万(3000000) |277 MB | 414.741 MB | - |part|140万(1400000) | 116 MB | 38.277 MB | - |supplier|20万(200000) |17 MB | 27.428 MB | - |date| 2556|228 KB | 275.804 KB | - -3. 测试结果 - - |Query |Time(ms) (1 BE) | Time(ms) (3 BE) | Parallelism | Runtime Filter Mode | - |---|---|---|---|---| - | q1.1 | 200 | 140 | 8 | IN | - | q1.2 | 90 | 80 | 8 | IN | - | q1.3 | 90 | 80 | 8 | IN | - | q2.1 | 1100 | 400 | 8 | BLOOM_FILTER | - | q2.2 | 900 | 330 | 8 | BLOOM_FILTER | - | q2.3 | 790 | 320 | 8 | BLOOM_FILTER | - | q3.1 | 3100 | 1280 | 8 | BLOOM_FILTER | - | q3.2 | 700 | 270 | 8 | BLOOM_FILTER | - | q3.3 | 540 | 270 | 8 | BLOOM_FILTER | - | q3.4 | 560 | 240 | 8 | BLOOM_FILTER | - | q4.1 | 2820 | 1150 | 8 | BLOOM_FILTER | - | q4.2 | 1430 | 670 | 8 | BLOOM_FILTER | - | q4.2 | 1750 | 1030 | 8 | BLOOM_FILTER | - - > 注1:“这个测试集和你的生产环境相去甚远,请对他保持怀疑态度!” - > - > 注2:测试结果为多次执行取平均值(Page Cache 会起到一定加速作用)。并且数据经过充分的 compaction (如果在刚导入数据后立刻测试,则查询延迟可能高于本测试结果) - > - > 注3:因环境受限,本测试使用的硬件规格较高,但整个测试过程中不会消耗如此多的硬件资源。其中内存消耗在 10GB 以内,CPU使用率在 10% 以内。 - > - > 注4:Parallelism 表示查询并发度,通过 `set parallel_fragment_exec_instance_num=8` 设置。 - > - > 注5:Runtime Filter Mode 是 Runtime Filter 的类型,通过 `set runtime_filter_type="BLOOM_FILTER"` 设置。([Runtime Filter](http://doris.incubator.apache.org/master/zh-CN/administrator-guide/runtime-filter.html) 功能对 SSB 测试集效果显著。因为该测试集中,Join 算子右表的数据可以对左表起到很好的过滤作用。你可以尝试通过 `set runtime_filter_mode=off` 关闭该功能,看看查询延迟的变化。) - diff --git a/docs/zh-CN/benchmark/systemd.md b/docs/zh-CN/benchmark/systemd.md deleted file mode 100644 index 21de382bfe..0000000000 --- a/docs/zh-CN/benchmark/systemd.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -{ - "title": "Systemd", - "language": "zh-CN" -} ---- - - - -# Systemd - -Doris 代码库中提供了 Systemd 配置文件,可以帮助用户在 Linux 中方便地控制 Doris 服务的启停。 - -配置文件请前往 [代码库](https://github.com/apache/incubator-doris/tree/master/tools/systemd) 查看。 diff --git a/new-docs/zh-CN/benchmark/tpc-h.md b/docs/zh-CN/benchmark/tpc-h.md similarity index 100% rename from new-docs/zh-CN/benchmark/tpc-h.md rename to docs/zh-CN/benchmark/tpc-h.md diff --git a/new-docs/zh-CN/data-operate/export/export-manual.md b/docs/zh-CN/data-operate/export/export-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/export/export-manual.md rename to docs/zh-CN/data-operate/export/export-manual.md diff --git a/new-docs/zh-CN/data-operate/export/export_with_mysql_dump.md b/docs/zh-CN/data-operate/export/export_with_mysql_dump.md similarity index 100% rename from new-docs/zh-CN/data-operate/export/export_with_mysql_dump.md rename to docs/zh-CN/data-operate/export/export_with_mysql_dump.md diff --git a/new-docs/zh-CN/data-operate/export/outfile.md b/docs/zh-CN/data-operate/export/outfile.md similarity index 100% rename from new-docs/zh-CN/data-operate/export/outfile.md rename to docs/zh-CN/data-operate/export/outfile.md diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/external-storage-load.md b/docs/zh-CN/data-operate/import/import-scenes/external-storage-load.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-scenes/external-storage-load.md rename to docs/zh-CN/data-operate/import/import-scenes/external-storage-load.md diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/external-table-load.md b/docs/zh-CN/data-operate/import/import-scenes/external-table-load.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-scenes/external-table-load.md rename to docs/zh-CN/data-operate/import/import-scenes/external-table-load.md diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/jdbc-load.md b/docs/zh-CN/data-operate/import/import-scenes/jdbc-load.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-scenes/jdbc-load.md rename to docs/zh-CN/data-operate/import/import-scenes/jdbc-load.md diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/kafka-load.md b/docs/zh-CN/data-operate/import/import-scenes/kafka-load.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-scenes/kafka-load.md rename to docs/zh-CN/data-operate/import/import-scenes/kafka-load.md diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/load-atomicity.md b/docs/zh-CN/data-operate/import/import-scenes/load-atomicity.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-scenes/load-atomicity.md rename to docs/zh-CN/data-operate/import/import-scenes/load-atomicity.md diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/load-data-convert.md b/docs/zh-CN/data-operate/import/import-scenes/load-data-convert.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-scenes/load-data-convert.md rename to docs/zh-CN/data-operate/import/import-scenes/load-data-convert.md diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/load-strict-mode.md b/docs/zh-CN/data-operate/import/import-scenes/load-strict-mode.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-scenes/load-strict-mode.md rename to docs/zh-CN/data-operate/import/import-scenes/load-strict-mode.md diff --git a/new-docs/zh-CN/data-operate/import/import-scenes/local-file-load.md b/docs/zh-CN/data-operate/import/import-scenes/local-file-load.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-scenes/local-file-load.md rename to docs/zh-CN/data-operate/import/import-scenes/local-file-load.md diff --git a/new-docs/zh-CN/data-operate/import/import-way/binlog-load-manual.md b/docs/zh-CN/data-operate/import/import-way/binlog-load-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-way/binlog-load-manual.md rename to docs/zh-CN/data-operate/import/import-way/binlog-load-manual.md diff --git a/new-docs/zh-CN/data-operate/import/import-way/broker-load-manual.md b/docs/zh-CN/data-operate/import/import-way/broker-load-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-way/broker-load-manual.md rename to docs/zh-CN/data-operate/import/import-way/broker-load-manual.md diff --git a/new-docs/zh-CN/data-operate/import/import-way/insert-into-manual.md b/docs/zh-CN/data-operate/import/import-way/insert-into-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-way/insert-into-manual.md rename to docs/zh-CN/data-operate/import/import-way/insert-into-manual.md diff --git a/new-docs/zh-CN/data-operate/import/import-way/load-json-format.md b/docs/zh-CN/data-operate/import/import-way/load-json-format.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-way/load-json-format.md rename to docs/zh-CN/data-operate/import/import-way/load-json-format.md diff --git a/new-docs/zh-CN/data-operate/import/import-way/routine-load-manual.md b/docs/zh-CN/data-operate/import/import-way/routine-load-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-way/routine-load-manual.md rename to docs/zh-CN/data-operate/import/import-way/routine-load-manual.md diff --git a/new-docs/zh-CN/data-operate/import/import-way/s3-load-manual.md b/docs/zh-CN/data-operate/import/import-way/s3-load-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-way/s3-load-manual.md rename to docs/zh-CN/data-operate/import/import-way/s3-load-manual.md diff --git a/new-docs/zh-CN/data-operate/import/import-way/spark-load-manual.md b/docs/zh-CN/data-operate/import/import-way/spark-load-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-way/spark-load-manual.md rename to docs/zh-CN/data-operate/import/import-way/spark-load-manual.md diff --git a/new-docs/zh-CN/data-operate/import/import-way/stream-load-manual.md b/docs/zh-CN/data-operate/import/import-way/stream-load-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/import-way/stream-load-manual.md rename to docs/zh-CN/data-operate/import/import-way/stream-load-manual.md diff --git a/new-docs/zh-CN/data-operate/import/load-manual.md b/docs/zh-CN/data-operate/import/load-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/import/load-manual.md rename to docs/zh-CN/data-operate/import/load-manual.md diff --git a/new-docs/zh-CN/data-operate/update-delete/batch-delete-manual.md b/docs/zh-CN/data-operate/update-delete/batch-delete-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/update-delete/batch-delete-manual.md rename to docs/zh-CN/data-operate/update-delete/batch-delete-manual.md diff --git a/new-docs/zh-CN/data-operate/update-delete/delete-manual.md b/docs/zh-CN/data-operate/update-delete/delete-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/update-delete/delete-manual.md rename to docs/zh-CN/data-operate/update-delete/delete-manual.md diff --git a/new-docs/zh-CN/data-operate/update-delete/sequence-column-manual.md b/docs/zh-CN/data-operate/update-delete/sequence-column-manual.md similarity index 100% rename from new-docs/zh-CN/data-operate/update-delete/sequence-column-manual.md rename to docs/zh-CN/data-operate/update-delete/sequence-column-manual.md diff --git a/new-docs/zh-CN/data-operate/update-delete/update.md b/docs/zh-CN/data-operate/update-delete/update.md similarity index 100% rename from new-docs/zh-CN/data-operate/update-delete/update.md rename to docs/zh-CN/data-operate/update-delete/update.md diff --git a/new-docs/zh-CN/data-table/advance-usage.md b/docs/zh-CN/data-table/advance-usage.md similarity index 100% rename from new-docs/zh-CN/data-table/advance-usage.md rename to docs/zh-CN/data-table/advance-usage.md diff --git a/new-docs/zh-CN/data-table/basic-usage.md b/docs/zh-CN/data-table/basic-usage.md similarity index 100% rename from new-docs/zh-CN/data-table/basic-usage.md rename to docs/zh-CN/data-table/basic-usage.md diff --git a/new-docs/zh-CN/data-table/best-practice.md b/docs/zh-CN/data-table/best-practice.md similarity index 100% rename from new-docs/zh-CN/data-table/best-practice.md rename to docs/zh-CN/data-table/best-practice.md diff --git a/new-docs/zh-CN/data-table/data-model.md b/docs/zh-CN/data-table/data-model.md similarity index 100% rename from new-docs/zh-CN/data-table/data-model.md rename to docs/zh-CN/data-table/data-model.md diff --git a/new-docs/zh-CN/data-table/data-partition.md b/docs/zh-CN/data-table/data-partition.md similarity index 100% rename from new-docs/zh-CN/data-table/data-partition.md rename to docs/zh-CN/data-table/data-partition.md diff --git a/new-docs/zh-CN/data-table/hit-the-rollup.md b/docs/zh-CN/data-table/hit-the-rollup.md similarity index 100% rename from new-docs/zh-CN/data-table/hit-the-rollup.md rename to docs/zh-CN/data-table/hit-the-rollup.md diff --git a/new-docs/zh-CN/data-table/index/bitmap-index.md b/docs/zh-CN/data-table/index/bitmap-index.md similarity index 100% rename from new-docs/zh-CN/data-table/index/bitmap-index.md rename to docs/zh-CN/data-table/index/bitmap-index.md diff --git a/new-docs/zh-CN/data-table/index/bloomfilter.md b/docs/zh-CN/data-table/index/bloomfilter.md similarity index 100% rename from new-docs/zh-CN/data-table/index/bloomfilter.md rename to docs/zh-CN/data-table/index/bloomfilter.md diff --git a/new-docs/zh-CN/data-table/index/prefix-index.md b/docs/zh-CN/data-table/index/prefix-index.md similarity index 100% rename from new-docs/zh-CN/data-table/index/prefix-index.md rename to docs/zh-CN/data-table/index/prefix-index.md diff --git a/new-docs/zh-CN/ecosystem/audit-plugin.md b/docs/zh-CN/ecosystem/audit-plugin.md similarity index 100% rename from new-docs/zh-CN/ecosystem/audit-plugin.md rename to docs/zh-CN/ecosystem/audit-plugin.md diff --git a/new-docs/zh-CN/ecosystem/datax.md b/docs/zh-CN/ecosystem/datax.md similarity index 100% rename from new-docs/zh-CN/ecosystem/datax.md rename to docs/zh-CN/ecosystem/datax.md diff --git a/docs/zh-CN/extending-doris/doris-manager/cluster-managenent.md b/docs/zh-CN/ecosystem/doris-manager/cluster-managenent.md similarity index 100% rename from docs/zh-CN/extending-doris/doris-manager/cluster-managenent.md rename to docs/zh-CN/ecosystem/doris-manager/cluster-managenent.md diff --git a/docs/zh-CN/extending-doris/doris-manager/compiling-deploying.md b/docs/zh-CN/ecosystem/doris-manager/compiling-deploying.md similarity index 100% rename from docs/zh-CN/extending-doris/doris-manager/compiling-deploying.md rename to docs/zh-CN/ecosystem/doris-manager/compiling-deploying.md diff --git a/docs/zh-CN/extending-doris/doris-manager/initializing.md b/docs/zh-CN/ecosystem/doris-manager/initializing.md similarity index 100% rename from docs/zh-CN/extending-doris/doris-manager/initializing.md rename to docs/zh-CN/ecosystem/doris-manager/initializing.md diff --git a/docs/zh-CN/extending-doris/doris-manager/space-list.md b/docs/zh-CN/ecosystem/doris-manager/space-list.md similarity index 100% rename from docs/zh-CN/extending-doris/doris-manager/space-list.md rename to docs/zh-CN/ecosystem/doris-manager/space-list.md diff --git a/docs/zh-CN/extending-doris/doris-manager/space-management.md b/docs/zh-CN/ecosystem/doris-manager/space-management.md similarity index 100% rename from docs/zh-CN/extending-doris/doris-manager/space-management.md rename to docs/zh-CN/ecosystem/doris-manager/space-management.md diff --git a/docs/zh-CN/extending-doris/doris-manager/system-settings.md b/docs/zh-CN/ecosystem/doris-manager/system-settings.md similarity index 100% rename from docs/zh-CN/extending-doris/doris-manager/system-settings.md rename to docs/zh-CN/ecosystem/doris-manager/system-settings.md diff --git a/new-docs/zh-CN/ecosystem/external-table/doris-on-es.md b/docs/zh-CN/ecosystem/external-table/doris-on-es.md similarity index 100% rename from new-docs/zh-CN/ecosystem/external-table/doris-on-es.md rename to docs/zh-CN/ecosystem/external-table/doris-on-es.md diff --git a/new-docs/zh-CN/ecosystem/external-table/hive-of-doris.md b/docs/zh-CN/ecosystem/external-table/hive-of-doris.md similarity index 100% rename from new-docs/zh-CN/ecosystem/external-table/hive-of-doris.md rename to docs/zh-CN/ecosystem/external-table/hive-of-doris.md diff --git a/new-docs/zh-CN/ecosystem/external-table/iceberg-of-doris.md b/docs/zh-CN/ecosystem/external-table/iceberg-of-doris.md similarity index 100% rename from new-docs/zh-CN/ecosystem/external-table/iceberg-of-doris.md rename to docs/zh-CN/ecosystem/external-table/iceberg-of-doris.md diff --git a/new-docs/zh-CN/ecosystem/external-table/odbc-of-doris.md b/docs/zh-CN/ecosystem/external-table/odbc-of-doris.md similarity index 100% rename from new-docs/zh-CN/ecosystem/external-table/odbc-of-doris.md rename to docs/zh-CN/ecosystem/external-table/odbc-of-doris.md diff --git a/new-docs/zh-CN/ecosystem/flink-doris-connector.md b/docs/zh-CN/ecosystem/flink-doris-connector.md similarity index 100% rename from new-docs/zh-CN/ecosystem/flink-doris-connector.md rename to docs/zh-CN/ecosystem/flink-doris-connector.md diff --git a/new-docs/zh-CN/ecosystem/logstash.md b/docs/zh-CN/ecosystem/logstash.md similarity index 100% rename from new-docs/zh-CN/ecosystem/logstash.md rename to docs/zh-CN/ecosystem/logstash.md diff --git a/new-docs/zh-CN/ecosystem/plugin-development-manual.md b/docs/zh-CN/ecosystem/plugin-development-manual.md similarity index 100% rename from new-docs/zh-CN/ecosystem/plugin-development-manual.md rename to docs/zh-CN/ecosystem/plugin-development-manual.md diff --git a/new-docs/zh-CN/ecosystem/seatunnel/flink-sink.md b/docs/zh-CN/ecosystem/seatunnel/flink-sink.md similarity index 100% rename from new-docs/zh-CN/ecosystem/seatunnel/flink-sink.md rename to docs/zh-CN/ecosystem/seatunnel/flink-sink.md diff --git a/new-docs/zh-CN/ecosystem/seatunnel/spark-sink.md b/docs/zh-CN/ecosystem/seatunnel/spark-sink.md similarity index 100% rename from new-docs/zh-CN/ecosystem/seatunnel/spark-sink.md rename to docs/zh-CN/ecosystem/seatunnel/spark-sink.md diff --git a/new-docs/zh-CN/ecosystem/spark-doris-connector.md b/docs/zh-CN/ecosystem/spark-doris-connector.md similarity index 100% rename from new-docs/zh-CN/ecosystem/spark-doris-connector.md rename to docs/zh-CN/ecosystem/spark-doris-connector.md diff --git a/new-docs/zh-CN/ecosystem/udf/contribute-udf.md b/docs/zh-CN/ecosystem/udf/contribute-udf.md similarity index 100% rename from new-docs/zh-CN/ecosystem/udf/contribute-udf.md rename to docs/zh-CN/ecosystem/udf/contribute-udf.md diff --git a/new-docs/zh-CN/ecosystem/udf/native-user-defined-function.md b/docs/zh-CN/ecosystem/udf/native-user-defined-function.md similarity index 100% rename from new-docs/zh-CN/ecosystem/udf/native-user-defined-function.md rename to docs/zh-CN/ecosystem/udf/native-user-defined-function.md diff --git a/new-docs/zh-CN/ecosystem/udf/remote-user-defined-function.md b/docs/zh-CN/ecosystem/udf/remote-user-defined-function.md similarity index 100% rename from new-docs/zh-CN/ecosystem/udf/remote-user-defined-function.md rename to docs/zh-CN/ecosystem/udf/remote-user-defined-function.md diff --git a/docs/zh-CN/extending-doris/audit-plugin.md b/docs/zh-CN/extending-doris/audit-plugin.md deleted file mode 100644 index df9387da87..0000000000 --- a/docs/zh-CN/extending-doris/audit-plugin.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -{ - "title": "审计日志插件", - "language": "zh-CN" -} ---- - - - -# 审计日志插件 - -Doris 的审计日志插件是在 FE 的插件框架基础上开发的。是一个可选插件。用户可以在运行时安装或卸载这个插件。 - -该插件可以将 FE 的审计日志定期的导入到指定 Doris 集群中,以方便用户通过 SQL 对审计日志进行查看和分析。 - -## 编译、配置和部署 - -### FE 配置 - -FE的插件框架当前是实验性功能,Doris中默认关闭,在FE的配置文件中,增加`plugin_enable = true`启用plugin框架 - -### AuditLoader 配置 - -auditloader plugin的配置位于`${DORIS}/fe_plugins/auditloader/src/main/assembly/`. - -打开 `plugin.conf` 进行配置。配置项说明参见注释。 - -### 编译 - -在 Doris 代码目录下执行 `sh build_plugin.sh` 后,会在 `fe_plugins/output` 目录下得到 `auditloader.zip` 文件。 - -### 部署 - -您可以将这个文件放置在一个 http 服务器上,或者拷贝`auditloader.zip`(或者解压`auditloader.zip`)到所有 FE 的指定目录下。这里我们使用后者。 - -### 安装 - -部署完成后,安装插件前,需要创建之前在 `plugin.conf` 中指定的审计数据库和表。其中建表语句如下: - -``` -create table doris_audit_tbl__ -( - query_id varchar(48) comment "Unique query id", - time datetime not null comment "Query start time", - client_ip varchar(32) comment "Client IP", - user varchar(64) comment "User name", - db varchar(96) comment "Database of this query", - state varchar(8) comment "Query result state. EOF, ERR, OK", - query_time bigint comment "Query execution time in millisecond", - scan_bytes bigint comment "Total scan bytes of this query", - scan_rows bigint comment "Total scan rows of this query", - return_rows bigint comment "Returned rows of this query", - stmt_id int comment "An incremental id of statement", - is_query tinyint comment "Is this statemt a query. 1 or 0", - frontend_ip varchar(32) comment "Frontend ip of executing this statement", - cpu_time_ms bigint comment "Total scan cpu time in millisecond of this query", - sql_hash varchar(48) comment "Hash value for this query", - peak_memory_bytes bigint comment "Peak memory bytes used on all backends of this query", - stmt string comment "The original statement, trimed if longer than 2G " -) engine=OLAP -duplicate key(query_id, time, client_ip) -partition by range(time) () -distributed by hash(query_id) buckets 1 -properties( - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-30", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "1", - "dynamic_partition.enable" = "true", - "replication_num" = "3" -); -``` - ->**注意** -> -> 上面表结构中:stmt string ,这个只能在0.15及之后版本中使用,之前版本,字段类型使用varchar - -其中 `dynamic_partition` 属性根据自己的需要,选择审计日志保留的天数。 - -之后,连接到 Doris 后使用 `INSTALL PLUGIN` 命令完成安装。安装成功后,可以通过 `SHOW PLUGINS` 看到已经安装的插件,并且状态为 `INSTALLED`。 - -完成后,插件会不断的以指定的时间间隔将审计日志插入到这个表中。 - - - - - - - - - - - - - - - - - - - diff --git a/docs/zh-CN/extending-doris/datax.md b/docs/zh-CN/extending-doris/datax.md deleted file mode 100644 index 6c11f5b94d..0000000000 --- a/docs/zh-CN/extending-doris/datax.md +++ /dev/null @@ -1,104 +0,0 @@ ---- -{ - "title": "DataX doriswriter", - "language": "zh-CN" -} ---- - - - -# DataX doriswriter - -[DataX](https://github.com/alibaba/DataX) doriswriter 插件,用于通过 DataX 同步其他数据源的数据到 Doris 中。 - -这个插件是利用Doris的Stream Load 功能进行数据导入的。需要配合 DataX 服务一起使用。 - -## 关于 DataX - -DataX 是阿里云 DataWorks数据集成 的开源版本,在阿里巴巴集团内被广泛使用的离线数据同步工具/平台。DataX 实现了包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、Hologres、DRDS 等各种异构数据源之间高效的数据同步功能。 - -更多信息请参阅: `https://github.com/alibaba/DataX/` - -## 使用手册 - -DataX doriswriter 插件代码 [这里](https://github.com/apache/incubator-doris/tree/master/extension/DataX)。 - -这个目录包含插件代码以及 DataX 项目的开发环境。 - -doriswriter 插件依赖的 DataX 代码中的一些模块。而这些模块并没有在 Maven 官方仓库中。所以我们在开发 doriswriter 插件时,需要下载完整的 DataX 代码库,才能进行插件的编译和开发。 - -### 目录结构 - -1. `doriswriter/` - - 这个目录是 doriswriter 插件的代码目录。这个目录中的所有代码,都托管在 Apache Doris 的代码库中。 - - doriswriter 插件帮助文档在这里:`doriswriter/doc` - -2. `init-env.sh` - - 这个脚本主要用于构建 DataX 开发环境,他主要进行了以下操作: - - 1. 将 DataX 代码库 clone 到本地。 - 2. 将 `doriswriter/` 目录软链到 `DataX/doriswriter` 目录。 - 3. 在 `DataX/pom.xml` 文件中添加 `doriswriter` 模块。 - 4. 将 `DataX/core/pom.xml` 文件中的 httpclient 版本从 4.5 改为 4.5.13. - - > httpclient v4.5 在处理 307 转发时有bug。 - - 这个脚本执行后,开发者就可以进入 `DataX/` 目录开始开发或编译了。因为做了软链,所以任何对 `DataX/doriswriter` 目录中文件的修改,都会反映到 `doriswriter/` 目录中,方便开发者提交代码。 - -### 编译 - -1. 运行 `init-env.sh` -2. 按需修改 `DataX/doriswriter` 中的代码。 -3. 编译 doriswriter: - - 1. 单独编译 doriswriter 插件: - - `mvn clean install -pl plugin-rdbms-util,doriswriter -DskipTests` - - 2. 编译整个 DataX 项目: - - `mvn package assembly:assembly -Dmaven.test.skip=true` - - 产出在 `target/datax/datax/`. - - > hdfsreader, hdfswriter and oscarwriter 这三个插件需要额外的jar包。如果你并不需要这些插件,可以在 `DataX/pom.xml` 中删除这些插件的模块。 - - 3. 编译错误 - - 如遇到如下编译错误: - - ``` - Could not find artifact com.alibaba.datax:datax-all:pom:0.0.1-SNAPSHOT ... - ``` - - 可尝试以下方式解决: - - 1. 下载 [alibaba-datax-maven-m2-20210928.tar.gz](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/alibaba-datax-maven-m2-20210928.tar.gz) - 2. 解压后,将得到的 `alibaba/datax/` 目录,拷贝到所使用的 maven 对应的 `.m2/repository/com/alibaba/` 下。 - 3. 再次尝试编译。 - -4. 按需提交修改。 - -### 示例 - -doriswriter 插件的使用说明请参阅 [这里](https://github.com/apache/incubator-doris/blob/master/extension/DataX/doriswriter/doc/doriswriter.md) diff --git a/docs/zh-CN/extending-doris/doris-on-es.md b/docs/zh-CN/extending-doris/doris-on-es.md deleted file mode 100644 index 7840b6be27..0000000000 --- a/docs/zh-CN/extending-doris/doris-on-es.md +++ /dev/null @@ -1,588 +0,0 @@ ---- -{ - "title": "Doris On ES", - "language": "zh-CN" -} ---- - - - -# Doris On ES - -Doris-On-ES将Doris的分布式查询规划能力和ES(Elasticsearch)的全文检索能力相结合,提供更完善的OLAP分析场景解决方案: - - 1. ES中的多index分布式Join查询 - 2. Doris和ES中的表联合查询,更复杂的全文检索过滤 - -本文档主要介绍该功能的实现原理、使用方式等。 - -## 名词解释 - -### Doris相关 -* FE:Frontend,Doris 的前端节点,负责元数据管理和请求接入 -* BE:Backend,Doris 的后端节点,负责查询执行和数据存储 - -### ES相关 -* DataNode:ES的数据存储与计算节点 -* MasterNode:ES的Master节点,管理元数据、节点、数据分布等 -* scroll:ES内置的数据集游标特性,用来对数据进行流式扫描和过滤 -* _source: 导入时传入的原始JSON格式文档内容 -* doc_values: ES/Lucene 中字段的列式存储定义 -* keyword: 字符串类型字段,ES/Lucene不会对文本内容进行分词处理 -* text: 字符串类型字段,ES/Lucene会对文本内容进行分词处理,分词器需要用户指定,默认为standard英文分词器 - - -## 使用方法 - -### 创建ES索引 - -``` -PUT test -{ - "settings": { - "index": { - "number_of_shards": "1", - "number_of_replicas": "0" - } - }, - "mappings": { - "doc": { // ES 7.x版本之后创建索引时不需要指定type,会有一个默认且唯一的`_doc` type - "properties": { - "k1": { - "type": "long" - }, - "k2": { - "type": "date" - }, - "k3": { - "type": "keyword" - }, - "k4": { - "type": "text", - "analyzer": "standard" - }, - "k5": { - "type": "float" - } - } - } - } -} -``` - -### ES索引导入数据 - -``` -POST /_bulk -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "Trying out Elasticsearch", "k4": "Trying out Elasticsearch", "k5": 10.0} -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "Trying out Doris", "k4": "Trying out Doris", "k5": 10.0} -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "Doris On ES", "k4": "Doris On ES", "k5": 10.0} -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "Doris", "k4": "Doris", "k5": 10.0} -{"index":{"_index":"test","_type":"doc"}} -{ "k1" : 100, "k2": "2020-01-01", "k3": "ES", "k4": "ES", "k5": 10.0} -``` - -### Doris中创建ES外表 - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH // ENGINE必须是Elasticsearch -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", - -"user" = "root", -"password" = "root" -); -``` - -参数说明: - -参数 | 说明 ----|--- -**hosts** | ES集群地址,可以是一个或多个,也可以是ES前端的负载均衡地址 -**index** | 对应的ES的index名字,支持alias,如果使用doc_value,需要使用真实的名称 -**type** | index的type,不指定的情况会使用_doc -**user** | ES集群用户名 -**password** | 对应用户的密码信息 - -* ES 7.x之前的集群请注意在建表的时候选择正确的**索引类型type** -* 认证方式目前仅支持Http Basic认证,并且需要确保该用户有访问: /\_cluster/state/、\_nodes/http等路径和index的读权限; 集群未开启安全认证,用户名和密码不需要设置 -* Doris表中的列名需要和ES中的字段名完全匹配,字段类型应该保持一致 -* **ENGINE**必须是 **Elasticsearch** - -##### 过滤条件下推 -`Doris On ES`一个重要的功能就是过滤条件的下推: 过滤条件下推给ES,这样只有真正满足条件的数据才会被返回,能够显著的提高查询性能和降低Doris和Elasticsearch的CPU、memory、IO使用量 - -下面的操作符(Operators)会被优化成如下ES Query: - -| SQL syntax | ES 5.x+ syntax | -|-------|:---:| -| = | term query| -| in | terms query | -| > , < , >= , ⇐ | range query | -| and | bool.filter | -| or | bool.should | -| not | bool.must_not | -| not in | bool.must_not + terms query | -| is\_not\_null | exists query | -| is\_null | bool.must_not + exists query | -| esquery | ES原生json形式的QueryDSL | - -##### 数据类型映射 - -Doris\ES | byte | short | integer | long | float | double| keyword | text | date -------------- | ------------- | ------ | ---- | ----- | ---- | ------ | ----| --- | --- | -tinyint | √ | | | | | | | | -smallint | √ | √ | | | | | | | -int | √ | √ | √ | | | | | | -bigint | √ | √ | √ | √ | | | | | -float | | | | | √ | | | | -double | | | | | | √ | | | -char | | | | | | | √ | √ | -varchar | | | | | | | √ | √ | -date | | | | | | | | | √| -datetime | | | | | | | | | √| - - -### 启用列式扫描优化查询速度(enable\_docvalue\_scan=true) - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", -"user" = "root", -"password" = "root", - -"enable_docvalue_scan" = "true" -); -``` - -参数说明: - -参数 | 说明 ----|--- -**enable\_docvalue\_scan** | 是否开启通过ES/Lucene列式存储获取查询字段的值,默认为false - -开启后Doris从ES中获取数据会遵循以下两个原则: - -* **尽力而为**: 自动探测要读取的字段是否开启列式存储(doc_value: true),如果获取的字段全部有列存,Doris会从列式存储中获取所有字段的值 -* **自动降级**: 如果要获取的字段只要有一个字段没有列存,所有字段的值都会从行存`_source`中解析获取 - -##### 优势: - -默认情况下,Doris On ES会从行存也就是`_source`中获取所需的所有列,`_source`的存储采用的行式+json的形式存储,在批量读取性能上要劣于列式存储,尤其在只需要少数列的情况下尤为明显,只获取少数列的情况下,docvalue的性能大约是_source性能的十几倍 - -##### 注意 -1. `text`类型的字段在ES中是没有列式存储,因此如果要获取的字段值有`text`类型字段会自动降级为从`_source`中获取 -2. 在获取的字段数量过多的情况下(`>= 25`),从`docvalue`中获取字段值的性能会和从`_source`中获取字段值基本一样 - - -### 探测keyword类型字段(enable\_keyword\_sniff=true) - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", -"user" = "root", -"password" = "root", - -"enable_keyword_sniff" = "true" -); -``` - -参数说明: - -参数 | 说明 ----|--- -**enable\_keyword\_sniff** | 是否对ES中字符串类型分词类型(**text**) `fields` 进行探测,获取额外的未分词(**keyword**)字段名(multi-fields机制) - -在ES中可以不建立index直接进行数据导入,这时候ES会自动创建一个新的索引,针对字符串类型的字段ES会创建一个既有`text`类型的字段又有`keyword`类型的字段,这就是ES的multi fields特性,mapping如下: - -``` -"k4": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } -} -``` -对k4进行条件过滤时比如=,Doris On ES会将查询转换为ES的TermQuery - -SQL过滤条件: - -``` -k4 = "Doris On ES" -``` - -转换成ES的query DSL为: - -``` -"term" : { - "k4": "Doris On ES" - -} -``` - -因为k4的第一字段类型为`text`,在数据导入的时候就会根据k4设置的分词器(如果没有设置,就是standard分词器)进行分词处理得到doris、on、es三个Term,如下ES analyze API分析: - -``` -POST /_analyze -{ - "analyzer": "standard", - "text": "Doris On ES" -} -``` -分词的结果是: - -``` -{ - "tokens": [ - { - "token": "doris", - "start_offset": 0, - "end_offset": 5, - "type": "", - "position": 0 - }, - { - "token": "on", - "start_offset": 6, - "end_offset": 8, - "type": "", - "position": 1 - }, - { - "token": "es", - "start_offset": 9, - "end_offset": 11, - "type": "", - "position": 2 - } - ] -} -``` -查询时使用的是: - -``` -"term" : { - "k4": "Doris On ES" -} -``` -`Doris On ES`这个term匹配不到词典中的任何term,不会返回任何结果,而启用`enable_keyword_sniff: true`会自动将`k4 = "Doris On ES"`转换成`k4.keyword = "Doris On ES"`来完全匹配SQL语义,转换后的ES query DSL为: - -``` -"term" : { - "k4.keyword": "Doris On ES" -} -``` - -`k4.keyword` 的类型是`keyword`,数据写入ES中是一个完整的term,所以可以匹配 - -### 开启节点自动发现, 默认为true(es\_nodes\_discovery=true) - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", -"user" = "root", -"password" = "root", - -"nodes_discovery" = "true" -); -``` - -参数说明: - -参数 | 说明 ----|--- -**es\_nodes\_discovery** | 是否开启es节点发现,默认为true - -当配置为true时,Doris将从ES找到所有可用的相关数据节点(在上面分配的分片)。如果ES数据节点的地址没有被Doris BE访问,则设置为false。ES集群部署在与公共Internet隔离的内网,用户通过代理访问 - -### ES集群是否开启https访问模式,如果开启应设置为`true`,默认为false(http\_ssl\_enabled=true) - -``` -CREATE EXTERNAL TABLE `test` ( - `k1` bigint(20) COMMENT "", - `k2` datetime COMMENT "", - `k3` varchar(20) COMMENT "", - `k4` varchar(100) COMMENT "", - `k5` float COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://192.168.0.1:8200,http://192.168.0.2:8200", -"index" = "test", -"type" = "doc", -"user" = "root", -"password" = "root", - -"http_ssl_enabled" = "true" -); -``` - -参数说明: - -参数 | 说明 ----|--- -**http\_ssl\_enabled** | ES集群是否开启https访问模式 - -目前会fe/be实现方式为信任所有,这是临时解决方案,后续会使用真实的用户配置证书 - -### 查询用法 - -完成在Doris中建立ES外表后,除了无法使用Doris中的数据模型(rollup、预聚合、物化视图等)外并无区别 - -#### 基本查询 - -``` -select * from es_table where k1 > 1000 and k3 ='term' or k4 like 'fu*z_' -``` - -#### 扩展的esquery(field, QueryDSL) -通过`esquery(field, QueryDSL)`函数将一些无法用sql表述的query如match_phrase、geoshape等下推给ES进行过滤处理,`esquery`的第一个列名参数用于关联`index`,第二个参数是ES的基本`Query DSL`的json表述,使用花括号`{}`包含,json的`root key`有且只能有一个,如match_phrase、geo_shape、bool等 - -match_phrase查询: - -``` -select * from es_table where esquery(k4, '{ - "match_phrase": { - "k4": "doris on es" - } - }'); -``` -geo相关查询: - -``` -select * from es_table where esquery(k4, '{ - "geo_shape": { - "location": { - "shape": { - "type": "envelope", - "coordinates": [ - [ - 13, - 53 - ], - [ - 14, - 52 - ] - ] - }, - "relation": "within" - } - } - }'); -``` - -bool查询: - -``` -select * from es_table where esquery(k4, ' { - "bool": { - "must": [ - { - "terms": { - "k1": [ - 11, - 12 - ] - } - }, - { - "terms": { - "k2": [ - 100 - ] - } - } - ] - } - }'); -``` - - - -## 原理 - -``` -+----------------------------------------------+ -| | -| Doris +------------------+ | -| | FE +--------------+-------+ -| | | Request Shard Location -| +--+-------------+-+ | | -| ^ ^ | | -| | | | | -| +-------------------+ +------------------+ | | -| | | | | | | | | -| | +----------+----+ | | +--+-----------+ | | | -| | | BE | | | | BE | | | | -| | +---------------+ | | +--------------+ | | | -+----------------------------------------------+ | - | | | | | | | - | | | | | | | - | HTTP SCROLL | | HTTP SCROLL | | -+-----------+---------------------+------------+ | -| | v | | v | | | -| | +------+--------+ | | +------+-------+ | | | -| | | | | | | | | | | -| | | DataNode | | | | DataNode +<-----------+ -| | | | | | | | | | | -| | | +<--------------------------------+ -| | +---------------+ | | |--------------| | | | -| +-------------------+ +------------------+ | | -| Same Physical Node | | -| | | -| +-----------------------+ | | -| | | | | -| | MasterNode +<-----------------+ -| ES | | | -| +-----------------------+ | -+----------------------------------------------+ - - -``` - -1. 创建ES外表后,FE会请求建表指定的主机,获取所有节点的HTTP端口信息以及index的shard分布信息等,如果请求失败会顺序遍历host列表直至成功或完全失败 - -2. 查询时会根据FE得到的一些节点信息和index的元数据信息,生成查询计划并发给对应的BE节点 - -3. BE节点会根据`就近原则`即优先请求本地部署的ES节点,BE通过`HTTP Scroll`方式流式的从ES index的每个分片中并发的从`_source`或`docvalue`中获取数据 - -4. Doris计算完结果后,返回给用户 - -## 最佳实践 - -### 时间类型字段使用建议 - -在ES中,时间类型的字段使用十分灵活,但是在Doris On ES中如果对时间类型字段的类型设置不当,则会造成过滤条件无法下推 - -创建索引时对时间类型格式的设置做最大程度的格式兼容: - -``` - "dt": { - "type": "date", - "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis" - } -``` - -在Doris中建立该字段时建议设置为`date`或`datetime`,也可以设置为`varchar`类型, 使用如下SQL语句都可以直接将过滤条件下推至ES: - -``` -select * from doe where k2 > '2020-06-21'; - -select * from doe where k2 < '2020-06-21 12:00:00'; - -select * from doe where k2 < 1593497011; - -select * from doe where k2 < now(); - -select * from doe where k2 < date_format(now(), '%Y-%m-%d'); -``` - -注意: - -* 在ES中如果不对时间类型的字段设置`format`, 默认的时间类型字段格式为 - -``` -strict_date_optional_time||epoch_millis -``` - -* 导入到ES的日期字段如果是时间戳需要转换成`ms`, ES内部处理时间戳都是按照`ms`进行处理的, 否则Doris On ES会出现显示错误 - -### 获取ES元数据字段`_id` - -导入文档在不指定`_id`的情况下ES会给每个文档分配一个全局唯一的`_id`即主键, 用户也可以在导入时为文档指定一个含有特殊业务意义的`_id`; 如果需要在Doris On ES中获取该字段值,建表时可以增加类型为`varchar`的`_id`字段: - -``` -CREATE EXTERNAL TABLE `doe` ( - `_id` varchar COMMENT "", - `city` varchar COMMENT "" -) ENGINE=ELASTICSEARCH -PROPERTIES ( -"hosts" = "http://127.0.0.1:8200", -"user" = "root", -"password" = "root", -"index" = "doe", -"type" = "doc" -} -``` - -注意: - -1. `_id`字段的过滤条件仅支持`=`和`in`两种 -2. `_id`字段只能是`varchar`类型 - -## Q&A - -1. Doris On ES对ES的版本要求 - - ES主版本大于5,ES在2.x之前和5.x之后数据的扫描方式不同,目前支持仅5.x之后的 - -2. 是否支持X-Pack认证的ES集群 - - 支持所有使用HTTP Basic认证方式的ES集群 -3. 一些查询比请求ES慢很多 - - 是,比如_count相关的query等,ES内部会直接读取满足条件的文档个数相关的元数据,不需要对真实的数据进行过滤 - -4. 聚合操作是否可以下推 - - 目前Doris On ES不支持聚合操作如sum, avg, min/max 等下推,计算方式是批量流式的从ES获取所有满足条件的文档,然后在Doris中进行计算 - diff --git a/docs/zh-CN/extending-doris/flink-doris-connector.md b/docs/zh-CN/extending-doris/flink-doris-connector.md deleted file mode 100644 index cd12cdd886..0000000000 --- a/docs/zh-CN/extending-doris/flink-doris-connector.md +++ /dev/null @@ -1,497 +0,0 @@ ---- -{ - - "title": "Flink Doris Connector", - "language": "zh-CN" - -} ---- - - - -# Flink Doris Connector - -Flink Doris Connector 可以支持通过 Flink 操作(读取、插入、修改、删除) Doris 中存储的数据。 - -代码库地址:https://github.com/apache/incubator-doris-flink-connector - -* 可以将 `Doris` 表映射为 `DataStream` 或者 `Table`。 - ->**注意:** -> ->1. 修改和删除只支持在 Unique Key 模型上 ->2. 目前的删除是支持 Flink CDC 的方式接入数据实现自动删除,如果是其他数据接入的方式删除需要自己实现。Flink CDC 的数据删除使用方式参照本文档最后一节 - -## 版本兼容 - -| Connector | Flink | Doris | Java | Scala | -| --------- | ----- | ------ | ---- |------| -| 1.11.6-2.12-xx | 1.11.x | 0.13+ | 8 | 2.12 | -| 1.12.7-2.12-xx | 1.12.x | 0.13.+ | 8 | 2.12 | -| 1.13.5-2.12-xx | 1.13.x | 0.13.+ | 8 | 2.12 | -| 1.14.4-2.12-xx | 1.14.x | 0.13.+ | 8 | 2.12 | - -## 编译与安装 - -准备工作 - -1.修改`custom_env.sh.tpl`文件,重命名为`custom_env.sh` - -2.指定thrift安装目录 - -```bash -##源文件内容 -#export THRIFT_BIN= -#export MVN_BIN= -#export JAVA_HOME= - -##修改如下,MacOS为例 -export THRIFT_BIN=/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift -#export MVN_BIN= -#export JAVA_HOME= - -安装 `thrift` 0.13.0 版本(注意:`Doris` 0.15 和最新的版本基于 `thrift` 0.13.0 构建, 之前的版本依然使用`thrift` 0.9.3 构建) - Windows: - 1.下载:`http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe`(下载目录自己指定) - 2.修改thrift-0.13.0.exe 为 thrift - - MacOS: - 1. 下载:`brew install thrift@0.13.0` - 2. 默认下载地址:/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift - - - 注:MacOS执行 `brew install thrift@0.13.0` 可能会报找不到版本的错误,解决方法如下,在终端执行: - 1. `brew tap-new $USER/local-tap` - 2. `brew extract --version='0.13.0' thrift $USER/local-tap` - 3. `brew install thrift@0.13.0` - 参考链接: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c` - - Linux: - 1.下载源码包:`wget https://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz` - 2.安装依赖:`yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++` - 3.`tar zxvf thrift-0.13.0.tar.gz` - 4.`cd thrift-0.13.0` - 5.`./configure --without-tests` - 6.`make` - 7.`make install` - 安装完成后查看版本:thrift --version - 注:如果编译过Doris,则不需要安装thrift,可以直接使用 $DORIS_HOME/thirdparty/installed/bin/thrift -``` - -在源码目录下执行: - -```bash -sh build.sh - - Usage: - build.sh --flink version --scala version # specify flink and scala version - build.sh --tag # this is a build from tag - e.g.: - build.sh --flink 1.14.3 --scala 2.12 - build.sh --tag - -然后按照你需要版本执行命令编译即可,例如: -sh build.sh --flink 1.14.3 --scala 2.12 -``` - -> 注:如果你是从 tag 检出的源码,则可以直接执行 `sh build.sh --tag`,而无需指定 flink 和 scala 的版本。因为 tag 源码中的版本是固定的。比如 `1.13.5-2.12-1.0.1` 表示 flink 版本 1.13.5,scala 版本 2.12,connector 版本 1.0.1。 - -编译成功后,会在 `target/` 目录下生成文件,如:`flink-doris-connector-1.14_2.12-1.0.0-SNAPSHOT.jar` 。将此文件复制到 `Flink` 的 `ClassPath` 中即可使用 `Flink-Doris-Connector` 。例如, `Local` 模式运行的 `Flink` ,将此文件放入 `jars/` 文件夹下。 `Yarn` 集群模式运行的 `Flink` ,则将此文件放入预部署包中。 - -**备注** - -1. Doris FE 要在配置中配置启用 http v2 -2. Scala 版本目前支持2.12和2.11 - -conf/fe.conf - -``` -enable_http_server_v2 = true -``` - -## 使用 Maven 管理 - -添加 flink-doris-connector 和必要的 Flink Maven 依赖 - -Flink 1.13.* 及以前的版本 -``` - - org.apache.flink - flink-java - ${flink.version} - provided - - - org.apache.flink - flink-streaming-java_${scala.version} - ${flink.version} - provided - - - org.apache.flink - flink-clients_${scala.version} - ${flink.version} - provided - - - - org.apache.flink - flink-table-common - ${flink.version} - provided - - - org.apache.flink - flink-table-api-java-bridge_${scala.version} - ${flink.version} - provided - - - org.apache.flink - flink-table-planner-blink_${scala.version} - ${flink.version} - provided - - - - org.apache.doris - flink-doris-connector-1.13_2.12 - - - 1.0.3 - -``` -Flink 1.14.* 版本 -``` - - org.apache.flink - flink-java - ${flink.version} - provided - - - org.apache.flink - flink-streaming-java_${scala.version} - ${flink.version} - provided - - - org.apache.flink - flink-clients_${scala.version} - ${flink.version} - provided - - - - org.apache.flink - flink-table-planner_${scala.version} - ${flink.version} - provided - - - - org.apache.doris - flink-doris-connector-1.14_2.12 - 1.0.3 - -``` - -**备注** - -1.请根据不同的 Flink 和 Scala 版本替换对应的 Connector 和 Flink 依赖版本。 -2.目前maven中仅提供了scala2.12版本的包,2.11版本的包需要自行编译,参考上面编译安装小节。 - -## 使用方法 - -Flink 读写 Doris 数据主要有三种方式 - -* SQL -* DataStream -* DataSet - -### 参数配置 - -Flink Doris Connector Sink 的内部实现是通过 `Stream Load` 服务向 Doris 写入数据, 同时也支持 `Stream Load` 请求参数的配置设定 - -参数配置方法 -* SQL 使用 `WITH` 参数 `sink.properties.` 配置 -* DataStream 使用方法`DorisExecutionOptions.builder().setStreamLoadProp(Properties)`配置 - -### SQL - -* Source - -```sql -CREATE TABLE flink_doris_source ( - name STRING, - age INT, - price DECIMAL(5,2), - sale DOUBLE - ) - WITH ( - 'connector' = 'doris', - 'fenodes' = '$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT', - 'table.identifier' = '$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME', - 'username' = '$YOUR_DORIS_USERNAME', - 'password' = '$YOUR_DORIS_PASSWORD' -); -``` - -* Sink - -```sql -CREATE TABLE flink_doris_sink ( - name STRING, - age INT, - price DECIMAL(5,2), - sale DOUBLE - ) - WITH ( - 'connector' = 'doris', - 'fenodes' = '$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT', - 'table.identifier' = '$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME', - 'username' = '$YOUR_DORIS_USERNAME', - 'password' = '$YOUR_DORIS_PASSWORD' -); -``` - -* Insert - -```sql -INSERT INTO flink_doris_sink select name,age,price,sale from flink_doris_source -``` - -### DataStream - -* Source - -```java - Properties properties = new Properties(); - properties.put("fenodes","FE_IP:8030"); - properties.put("username","root"); - properties.put("password",""); - properties.put("table.identifier","db.table"); - env.addSource(new DorisSourceFunction( - new DorisStreamOptions(properties), - new SimpleListDeserializationSchema() - ) - ).print(); -``` - -* Sink - -Json 数据流 - -```java -Properties pro = new Properties(); -pro.setProperty("format", "json"); -pro.setProperty("strip_outer_array", "true"); -env.fromElements( - "{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}" - ) - .addSink( - DorisSink.sink( - DorisReadOptions.builder().build(), - DorisExecutionOptions.builder() - .setBatchSize(3) - .setBatchIntervalMs(0l) - .setMaxRetries(3) - .setStreamLoadProp(pro).build(), - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); - -``` - -Json 数据流 - -```java -env.fromElements( - "{\"longitude\": \"116.405419\", \"city\": \"北京\", \"latitude\": \"39.916927\"}" - ) - .addSink( - DorisSink.sink( - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); -``` - -RowData 数据流 - -```java -DataStream source = env.fromElements("") - .map(new MapFunction() { - @Override - public RowData map(String value) throws Exception { - GenericRowData genericRowData = new GenericRowData(3); - genericRowData.setField(0, StringData.fromString("北京")); - genericRowData.setField(1, 116.405419); - genericRowData.setField(2, 39.916927); - return genericRowData; - } - }); - -String[] fields = {"city", "longitude", "latitude"}; -LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()}; - -source.addSink( - DorisSink.sink( - fields, - types, - DorisReadOptions.builder().build(), - DorisExecutionOptions.builder() - .setBatchSize(3) - .setBatchIntervalMs(0L) - .setMaxRetries(3) - .build(), - DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build() - )); -``` - -### DataSet - -* Sink - -```java -MapOperator data = env.fromElements("") - .map(new MapFunction() { - @Override - public RowData map(String value) throws Exception { - GenericRowData genericRowData = new GenericRowData(3); - genericRowData.setField(0, StringData.fromString("北京")); - genericRowData.setField(1, 116.405419); - genericRowData.setField(2, 39.916927); - return genericRowData; - } - }); - -DorisOptions dorisOptions = DorisOptions.builder() - .setFenodes("FE_IP:8030") - .setTableIdentifier("db.table") - .setUsername("root") - .setPassword("").build(); -DorisReadOptions readOptions = DorisReadOptions.defaults(); -DorisExecutionOptions executionOptions = DorisExecutionOptions.defaults(); - -LogicalType[] types = {new VarCharType(), new DoubleType(), new DoubleType()}; -String[] fields = {"city", "longitude", "latitude"}; - -DorisDynamicOutputFormat outputFormat = new DorisDynamicOutputFormat( - dorisOptions, readOptions, executionOptions, types, fields - ); - -outputFormat.open(0, 1); -data.output(outputFormat); -outputFormat.close(); -``` - -## 配置 - -### 通用配置项 - -| Key | Default Value | Comment | -| -------------------------------- | ----------------- | ------------------------------------------------------------ | -| fenodes | -- | Doris FE http 地址 | -| table.identifier | -- | Doris 表名,如:db1.tbl1 | -| username | -- | 访问 Doris 的用户名 | -| password | -- | 访问 Doris 的密码 | -| doris.request.retries | 3 | 向 Doris 发送请求的重试次数 | -| doris.request.connect.timeout.ms | 30000 | 向 Doris 发送请求的连接超时时间 | -| doris.request.read.timeout.ms | 30000 | 向 Doris 发送请求的读取超时时间 | -| doris.request.query.timeout.s | 3600 | 查询 Doris 的超时时间,默认值为1小时,-1表示无超时限制 | -| doris.request.tablet.size | Integer. MAX_VALUE | 一个 Partition 对应的 Doris Tablet 个数。
此数值设置越小,则会生成越多的 Partition。从而提升 Flink 侧的并行度,但同时会对 Doris 造成更大的压力。 | -| doris.batch.size | 1024 | 一次从 BE 读取数据的最大行数。增大此数值可减少 Flink 与 Doris 之间建立连接的次数。
从而减轻网络延迟所带来的的额外时间开销。 | -| doris.exec.mem.limit | 2147483648 | 单个查询的内存限制。默认为 2GB,单位为字节 | -| doris.deserialize.arrow.async | false | 是否支持异步转换 Arrow 格式到 flink-doris-connector 迭代所需的 RowBatch | -| doris.deserialize.queue.size | 64 | 异步转换 Arrow 格式的内部处理队列,当 doris.deserialize.arrow.async 为 true 时生效 | -| doris.read.field | -- | 读取 Doris 表的列名列表,多列之间使用逗号分隔 | -| doris.filter.query | -- | 过滤读取数据的表达式,此表达式透传给 Doris。Doris 使用此表达式完成源端数据过滤。 | -| sink.batch.size | 10000 | 单次写 BE 的最大行数 | -| sink.max-retries | 1 | 写 BE 失败之后的重试次数 | -| sink.batch.interval | 10s | flush 间隔时间,超过该时间后异步线程将 缓存中数据写入 BE。 默认值为10秒,支持时间单位 ms、 s、 min、 h 和 d。设置为 0 表示关闭定期写入。 | -| sink.properties.* | -- | Stream Load 的导入参数

例如:
'sink.properties.column_separator' = ', '
定义列分隔符

'sink.properties.escape_delimiters' = 'true'
特殊字符作为分隔符,'\\x01'会被转换为二进制的0x01

'sink.properties.format' = 'json'
'sink.properties.strip_outer_array' = 'true'
JSON格式导入| -| sink.enable-delete | true | 是否启用删除。此选项需要 Doris 表开启批量删除功能(0.15+版本默认开启),只支持 Unique 模型。| -| sink.batch.bytes | 10485760 | 单次写 BE 的最大数据量,当每个 batch 中记录的数据量超过该阈值时,会将缓存数据写入 BE。默认值为 10MB | -## Doris 和 Flink 列类型映射关系 - -| Doris Type | Flink Type | -| ---------- | -------------------------------- | -| NULL_TYPE | NULL | -| BOOLEAN | BOOLEAN | -| TINYINT | TINYINT | -| SMALLINT | SMALLINT | -| INT | INT | -| BIGINT | BIGINT | -| FLOAT | FLOAT | -| DOUBLE | DOUBLE | -| DATE | STRING | -| DATETIME | STRING | -| DECIMAL | DECIMAL | -| CHAR | STRING | -| LARGEINT | STRING | -| VARCHAR | STRING | -| DECIMALV2 | DECIMAL | -| TIME | DOUBLE | -| HLL | Unsupported datatype | - -## 使用 Flink CDC 接入 Doris 示例(支持 Insert / Update / Delete 事件) -```sql -CREATE TABLE cdc_mysql_source ( - id int - ,name VARCHAR - ,PRIMARY KEY (id) NOT ENFORCED -) WITH ( - 'connector' = 'mysql-cdc', - 'hostname' = '127.0.0.1', - 'port' = '3306', - 'username' = 'root', - 'password' = 'password', - 'database-name' = 'database', - 'table-name' = 'table' -); - --- 支持删除事件同步(sink.enable-delete='true'),需要 Doris 表开启批量删除功能 -CREATE TABLE doris_sink ( -id INT, -name STRING -) -WITH ( - 'connector' = 'doris', - 'fenodes' = '127.0.0.1:8030', - 'table.identifier' = 'database.table', - 'username' = 'root', - 'password' = '', - 'sink.properties.format' = 'json', - 'sink.properties.strip_outer_array' = 'true', - 'sink.enable-delete' = 'true' -); - -insert into doris_sink select id,name from cdc_mysql_source; -``` diff --git a/docs/zh-CN/extending-doris/hive-bitmap-udf.md b/docs/zh-CN/extending-doris/hive-bitmap-udf.md deleted file mode 100644 index f3ecbf154a..0000000000 --- a/docs/zh-CN/extending-doris/hive-bitmap-udf.md +++ /dev/null @@ -1,104 +0,0 @@ ---- -{ - "title": "Hive Bitmap UDF", - "language": "zh-CN" -} ---- - - - -# Hive UDF - - Hive Bitmap UDF 提供了在 hive 表中生成 bitmap 、bitmap 运算等 UDF,Hive 中的 bitmap 与 Doris bitmap 完全一致 ,Hive 中的 bitmap 可以通过 spark bitmap load 导入 doris - - 主要目的: - 1. 减少数据导入 doris 时间 , 除去了构建字典、bitmap 预聚合等流程; - 2. 节省 hive 存储 ,使用 bitmap 对数据压缩 ,减少了存储成本; - 3. 提供在 hive 中 bitmap 的灵活运算 ,比如:交集、并集、差集运算 ,计算后的 bitmap 也可以直接导入 doris; - -## 使用方法 - -### 在 Hive 中创建 Bitmap 类型表 - -```sql - --- 例子:创建 Hive Bitmap 表 -CREATE TABLE IF NOT EXISTS `hive_bitmap_table`( - `k1` int COMMENT '', - `k2` String COMMENT '', - `k3` String COMMENT '', - `uuid` binary COMMENT 'bitmap' -) comment 'comment' - --- 例子:创建普通 Hive 表 -CREATE TABLE IF NOT EXISTS `hive_table`( - `k1` int COMMENT '', - `k2` String COMMENT '', - `k3` String COMMENT '', - `uuid` int COMMENT '' -) comment 'comment' -``` - -### Hive Bitmap UDF 使用: - - Hive Bitmap UDF 需要在 Hive/Spark 中使用 - -```sql - --- 加载hive bitmap udf jar包 (需要将编译好的 hive-udf jar 包上传至 HDFS) -add jar hdfs://node:9001/hive-udf-jar-with-dependencies.jar; - --- 创建UDAF函数 -create temporary function to_bitmap as 'org.apache.doris.udf.ToBitmapUDAF'; -create temporary function bitmap_union as 'org.apache.doris.udf.BitmapUnionUDAF'; - --- 创建UDF函数 -create temporary function bitmap_count as 'org.apache.doris.udf.BitmapCountUDF'; -create temporary function bitmap_and as 'org.apache.doris.udf.BitmapAndUDF'; -create temporary function bitmap_or as 'org.apache.doris.udf.BitmapOrUDF'; -create temporary function bitmap_xor as 'org.apache.doris.udf.BitmapXorUDF'; - --- 例子:通过 to_bitmap 生成 bitmap 写入 Hive Bitmap 表 -insert into hive_bitmap_table -select - k1, - k2, - k3, - to_bitmap(uuid) as uuid -from - hive_table -group by - k1, - k2, - k3 - --- 例子:bitmap_count 计算 bitmap 中元素个数 -select k1,k2,k3,bitmap_count(uuid) from hive_bitmap_table - --- 例子:bitmap_union 用于计算分组后的 bitmap 并集 -select k1,bitmap_union(uuid) from hive_bitmap_table group by k1 - -``` - -### Hive Bitmap UDF 说明 - -## Hive bitmap 导入 doris - - 详见: 数据导入 -> Spark Load -> 基本操作 -> 创建导入 (示例3:上游数据源是hive binary类型情况) diff --git a/docs/zh-CN/extending-doris/hive-of-doris.md b/docs/zh-CN/extending-doris/hive-of-doris.md deleted file mode 100644 index e6371eb6c8..0000000000 --- a/docs/zh-CN/extending-doris/hive-of-doris.md +++ /dev/null @@ -1,117 +0,0 @@ ---- -{ - "title": "Doris On Hive", - "language": "zh-CN" -} ---- - - - -# Hive External Table of Doris - -Hive External Table of Doris 提供了 Doris 直接访问 Hive 外部表的能力,外部表省去了繁琐的数据导入工作,并借助 Doris 本身的 OLAP 的能力来解决 Hive 表的数据分析问题: - - 1. 支持 Hive 数据源接入Doris - 2. 支持 Doris 与 Hive 数据源中的表联合查询,进行更加复杂的分析操作 - -本文档主要介绍该功能的使用方式和注意事项等。 - -## 名词解释 - -### Doris 相关 - -* FE:Frontend,Doris 的前端节点,负责元数据管理和请求接入 -* BE:Backend,Doris 的后端节点,负责查询执行和数据存储 - -## 使用方法 - -### Doris 中创建 Hive 的外表 - -```sql --- 语法 -CREATE [EXTERNAL] TABLE table_name ( - col_name col_type [NULL | NOT NULL] [COMMENT "comment"] -) ENGINE=HIVE -[COMMENT "comment"] -PROPERTIES ( - 'property_name'='property_value', - ... -); - --- 例子:创建 Hive 集群中 hive_db 下的 hive_table 表 -CREATE TABLE `t_hive` ( - `k1` int NOT NULL COMMENT "", - `k2` char(10) NOT NULL COMMENT "", - `k3` datetime NOT NULL COMMENT "", - `k5` varchar(20) NOT NULL COMMENT "", - `k6` double NOT NULL COMMENT "" -) ENGINE=HIVE -COMMENT "HIVE" -PROPERTIES ( -'hive.metastore.uris' = 'thrift://192.168.0.1:9083', -'database' = 'hive_db', -'table' = 'hive_table' -); -``` - -#### 参数说明: - -- 外表列 - - 列名要于 Hive 表一一对应 - - 列的顺序需要与 Hive 表一致 - - 必须包含 Hive 表中的全部列 - - Hive 表分区列无需指定,与普通列一样定义即可。 -- ENGINE 需要指定为 HIVE -- PROPERTIES 属性: - - `hive.metastore.uris`:Hive Metastore 服务地址 - - `database`:挂载 Hive 对应的数据库名 - - `table`:挂载 Hive 对应的表名 - -## 类型匹配 - -支持的 Hive 列类型与 Doris 对应关系如下表: - -| Hive | Doris | 描述 | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| CHAR | CHAR | 当前仅支持UTF8编码 | -| VARCHAR | VARCHAR | 当前仅支持UTF8编码 | -| TINYINT | TINYINT | | -| SMALLINT | SMALLINT | | -| INT | INT | | -| BIGINT | BIGINT | | -| FLOAT | FLOAT | | -| DOUBLE | DOUBLE | | -| DECIMAL | DECIMAL | | -| DATE | DATE | | -| TIMESTAMP | DATETIME | Timestamp 转成 Datetime 会损失精度 | - -**注意:** -- Hive 表 Schema 变更**不会自动同步**,需要在 Doris 中重建 Hive 外表。 -- 当前 Hive 的存储格式仅支持 Text,Parquet 和 ORC 类型 -- 当前默认支持的 Hive 版本为 `2.3.7、3.1.2`,未在其他版本进行测试。后续后支持更多版本。 - -### 查询用法 - -完成在 Doris 中建立 Hive 外表后,除了无法使用 Doris 中的数据模型(rollup、预聚合、物化视图等)外,与普通的 Doris OLAP 表并无区别 - -```sql -select * from t_hive where k1 > 1000 and k3 ='term' or k4 like '%doris'; -``` diff --git a/docs/zh-CN/extending-doris/iceberg-of-doris.md b/docs/zh-CN/extending-doris/iceberg-of-doris.md deleted file mode 100644 index 8fd873add9..0000000000 --- a/docs/zh-CN/extending-doris/iceberg-of-doris.md +++ /dev/null @@ -1,210 +0,0 @@ ---- -{ - "title": "Doris On Iceberg", - "language": "zh-CN" -} ---- - - - -# Iceberg External Table of Doris - -Iceberg External Table of Doris 提供了 Doris 直接访问 Iceberg 外部表的能力,外部表省去了繁琐的数据导入工作,并借助 Doris 本身的 OLAP 的能力来解决 Iceberg 表的数据分析问题: - - 1. 支持 Iceberg 数据源接入Doris - 2. 支持 Doris 与 Iceberg 数据源中的表联合查询,进行更加复杂的分析操作 - -本文档主要介绍该功能的使用方式和注意事项等。 - -## 名词解释 - -### Doris 相关 - -* FE:Frontend,Doris 的前端节点,负责元数据管理和请求接入 -* BE:Backend,Doris 的后端节点,负责查询执行和数据存储 - -## 使用方法 - -### Doris 中创建 Iceberg 的外表 - -可以通过以下两种方式在 Doris 中创建 Iceberg 外表。建外表时无需声明表的列定义,Doris 可以根据 Iceberg 中表的列定义自动转换。 - -1. 创建一个单独的外表,用于挂载 Iceberg 表。 - 具体相关语法,可以通过 `HELP CREATE TABLE` 查看。 - - ```sql - -- 语法 - CREATE [EXTERNAL] TABLE table_name - ENGINE = ICEBERG - [COMMENT "comment"] - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.table" = "icberg_table_name", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - - - -- 例子:挂载 Iceberg 中 iceberg_db 下的 iceberg_table - CREATE TABLE `t_iceberg` - ENGINE = ICEBERG - PROPERTIES ( - "iceberg.database" = "iceberg_db", - "iceberg.table" = "iceberg_table", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - -2. 创建一个 Iceberg 数据库,用于挂载远端对应 Iceberg 数据库,同时挂载该 database 下的所有 table。 - 具体相关语法,可以通过 `HELP CREATE DATABASE` 查看。 - - ```sql - -- 语法 - CREATE DATABASE db_name - [COMMENT "comment"] - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - - -- 例子:挂载 Iceberg 中的 iceberg_db,同时挂载该 db 下的所有 table - CREATE DATABASE `iceberg_test_db` - PROPERTIES ( - "iceberg.database" = "iceberg_db", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - - `iceberg_test_db` 中的建表进度可以通过 `HELP SHOW TABLE CREATION` 查看。 - -也可以根据自己的需求明确指定列定义来创建 Iceberg 外表。 - -1. 创一个 Iceberg 外表 - - ```sql - -- 语法 - CREATE [EXTERNAL] TABLE table_name ( - col_name col_type [NULL | NOT NULL] [COMMENT "comment"] - ) ENGINE = ICEBERG - [COMMENT "comment"] - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.table" = "icberg_table_name", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - - -- 例子:挂载 Iceberg 中 iceberg_db 下的 iceberg_table - CREATE TABLE `t_iceberg` ( - `id` int NOT NULL COMMENT "id number", - `name` varchar(10) NOT NULL COMMENT "user name" - ) ENGINE = ICEBERG - PROPERTIES ( - "iceberg.database" = "iceberg_db", - "iceberg.table" = "iceberg_table", - "iceberg.hive.metastore.uris" = "thrift://192.168.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - -#### 参数说明: - -- 外表列 - - 列名要于 Iceberg 表一一对应 - - 列的顺序需要与 Iceberg 表一致 -- ENGINE 需要指定为 ICEBERG -- PROPERTIES 属性: - - `iceberg.hive.metastore.uris`:Hive Metastore 服务地址 - - `iceberg.database`:挂载 Iceberg 对应的数据库名 - - `iceberg.table`:挂载 Iceberg 对应的表名,挂载 Iceberg database 时无需指定。 - - `iceberg.catalog.type`:Iceberg 中使用的 catalog 方式,默认为 `HIVE_CATALOG`,当前仅支持该方式,后续会支持更多的 Iceberg catalog 接入方式。 - -### 展示表结构 - -展示表结构可以通过 `HELP SHOW CREATE TABLE` 查看。 - -### 同步挂载 - -当 Iceberg 表 Schema 发生变更时,可以通过 `REFRESH` 命令手动同步,该命令会将 Doris 中的 Iceberg 外表删除重建,具体帮助可以通过 `HELP REFRESH` 查看。 - -```sql --- 同步 Iceberg 表 -REFRESH TABLE t_iceberg; - --- 同步 Iceberg 数据库 -REFRESH DATABASE iceberg_test_db; -``` - -## 类型匹配 - -支持的 Iceberg 列类型与 Doris 对应关系如下表: - -| Iceberg | Doris | 描述 | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| INTEGER | INT | | -| LONG | BIGINT | | -| FLOAT | FLOAT | | -| DOUBLE | DOUBLE | | -| DATE | DATE | | -| TIMESTAMP | DATETIME | Timestamp 转成 Datetime 会损失精度 | -| STRING | STRING | | -| UUID | VARCHAR | 使用 VARCHAR 来代替 | -| DECIMAL | DECIMAL | | -| TIME | - | 不支持 | -| FIXED | - | 不支持 | -| BINARY | - | 不支持 | -| STRUCT | - | 不支持 | -| LIST | - | 不支持 | -| MAP | - | 不支持 | - -**注意:** -- Iceberg 表 Schema 变更**不会自动同步**,需要在 Doris 中通过 `REFRESH` 命令同步 Iceberg 外表或数据库。 -- 当前默认支持的 Iceberg 版本为 0.12.0,未在其他版本进行测试。后续后支持更多版本。 - -### 查询用法 - -完成在 Doris 中建立 Iceberg 外表后,除了无法使用 Doris 中的数据模型(rollup、预聚合、物化视图等)外,与普通的 Doris OLAP 表并无区别 - -```sql -select * from t_iceberg where k1 > 1000 and k3 ='term' or k4 like '%doris'; -``` - -## 相关系统配置 - -### FE配置 - -下面几个配置属于 Iceberg 外表系统级别的配置,可以通过修改 `fe.conf` 来配置,也可以通过 `ADMIN SET CONFIG` 来配置。 - -- `iceberg_table_creation_strict_mode` - - 创建 Iceberg 表默认开启 strict mode。 - strict mode 是指对 Iceberg 表的列类型进行严格过滤,如果有 Doris 目前不支持的数据类型,则创建外表失败。 - -- `iceberg_table_creation_interval_second` - - 自动创建 Iceberg 表的后台任务执行间隔,默认为 10s。 - -- `max_iceberg_table_creation_record_size` - - Iceberg 表创建记录保留的最大值,默认为 2000. 仅针对创建 Iceberg 数据库记录。 diff --git a/docs/zh-CN/extending-doris/logstash.md b/docs/zh-CN/extending-doris/logstash.md deleted file mode 100644 index c92bad0c65..0000000000 --- a/docs/zh-CN/extending-doris/logstash.md +++ /dev/null @@ -1,198 +0,0 @@ ---- -{ - "title": "Logstash Doris Output Plugin", - "language": "zh-CN" -} ---- - - - -# Doris output plugin - -该插件用于logstash输出数据到Doris,使用 HTTP 协议与 Doris FE Http接口交互,并通过 Doris 的 stream load 的方式进行数据导入. - -[了解Doris Stream Load ](http://doris.apache.org/zh-CN/administrator-guide/load-data/stream-load-manual.html) - -[了解更多关于Doris](http://doris.apache.org/zh-CN/) - - -## 安装和编译 -### 1.下载插件源码 - -### 2.编译 ## -在extension/logstash/ 目录下执行 - -`gem build logstash-output-doris.gemspec` - -你将在同目录下得到 logstash-output-doris-{version}.gem 文件 - -### 3.插件安装 -copy logstash-output-doris-{version}.gem 到 logstash 安装目录下 - -执行命令 - -`./bin/logstash-plugin install logstash-output-doris-{version}.gem` - -安装 logstash-output-doris 插件 - -## 配置 -### 示例: - -在config目录下新建一个配置配置文件,命名为 logstash-doris.conf - -具体配置如下: - - output { - doris { - http_hosts => [ "http://fehost:8030" ] - user => user_name - password => password - db => "db_name" - table => "table_name" - label_prefix => "label_prefix" - column_separator => "," - } - } - -配置说明: - -连接相关配置: - -配置 | 说明 ---- | --- -`http_hosts` | FE的HTTP交互地址 eg | ["http://fe1:8030", "http://fe2:8030"] -`user` | 用户名,该用户需要有doris对应库表的导入权限 -`password` | 密码 -`db` | 数据库名 -`table` | 表名 -`label_prefix` | 导入标识前缀,最终生成的标识为 *{label\_prefix}\_{db}\_{table}\_{time_stamp}* - - -导入相关配置:([参考文档](http://doris.apache.org/master/zh-CN/administrator-guide/load-data/stream-load-manual.html)) - -配置 | 说明 ---- | --- -`column_separator` | 列分割符,默认为\t。 -`columns` | 用于指定导入文件中的列和 table 中的列的对应关系。 -`where` | 导入任务指定的过滤条件。 -`max_filter_ratio` | 导入任务的最大容忍率,默认零容忍。 -`partition` | 待导入表的 Partition 信息。 -`timeout` | 超时时间,默认为600s。 -`strict_mode` | 严格模式,默认为false。 -`timezone` | 指定本次导入所使用的时区,默认为东八区。 -`exec_mem_limit` | 导入内存限制,默认为 2GB,单位为字节。 - -其他配置 - -配置 | 说明 ---- | --- -`save_on_failure` | 如果导入失败是否在本地保存,默认为true -`save_dir` | 本地保存目录,默认为 /tmp -`automatic_retries` | 失败时重试最大次数,默认为3 -`batch_size` | 每批次最多处理的event数量,默认为100000 -`idle_flush_time` | 最大间隔时间,默认为20(秒) - - -## 启动 -执行命令启动doris output plugin: - -`{logstash-home}/bin/logstash -f {logstash-home}/config/logstash-doris.conf --config.reload.automatic` - - - - -## 完整使用示例 -### 1.编译doris-output-plugin -1> 下载ruby压缩包,自行到[ruby官网](https://www.ruby-lang.org/en/downloads/)下载,这里使用的2.7.1版本 - -2> 编译安装,配置ruby的环境变量 - -3> 到doris源码 extension/logstash/ 目录下,执行 - -`gem build logstash-output-doris.gemspec` - -得到文件 logstash-output-doris-0.1.0.gem,至此编译完成 - -### 2.安装配置filebeat(此处使用filebeat作为input) - -1> [es官网](https://www.elastic.co/)下载 filebeat tar压缩包并解压 - -2> 进入filebeat目录下,修改配置文件 filebeat.yml 如下: - - filebeat.inputs: - - type: log - paths: - - /tmp/doris.data - output.logstash: - hosts: ["localhost:5044"] - -/tmp/doris.data 为doris数据路径 - -3> 启动filebeat: - -`./filebeat -e -c filebeat.yml -d "publish"` - - -### 3.安装logstash及doris-out-plugin -1> [es官网](https://www.elastic.co/)下载 logstash tar压缩包并解压 - -2> 将步骤1中得到的 logstash-output-doris-0.1.0.gem copy到logstash安装目录下 - -3> 执行 - -`./bin/logstash-plugin install logstash-output-doris-0.1.0.gem` - -安装插件 - -4> 在config 目录下新建配置文件 logstash-doris.conf 内容如下: - - input { - beats { - port => "5044" - } - } - - output { - doris { - http_hosts => [ "http://127.0.0.1:8030" ] - user => doris - password => doris - db => "logstash_output_test" - table => "output" - label_prefix => "doris" - column_separator => "," - columns => "a,b,c,d,e" - } - } - -这里的配置需按照配置说明自行配置 - -5> 启动logstash: - -./bin/logstash -f ./config/logstash-doris.conf --config.reload.automatic - -### 4.测试功能 - -向/tmp/doris.data追加写入数据 - -`echo a,b,c,d,e >> /tmp/doris.data` - -观察logstash日志,若返回response的Status为 Success,则导入成功,此时可在 logstash_output_test.output 表中查看已导入的数据 - diff --git a/docs/zh-CN/extending-doris/odbc-of-doris.md b/docs/zh-CN/extending-doris/odbc-of-doris.md deleted file mode 100644 index 951f9d8f91..0000000000 --- a/docs/zh-CN/extending-doris/odbc-of-doris.md +++ /dev/null @@ -1,361 +0,0 @@ ---- -{ - "title": "Doris On ODBC", - "language": "zh-CN" -} ---- - - - -# ODBC External Table Of Doris - -ODBC External Table Of Doris 提供了Doris通过数据库访问的标准接口(ODBC)来访问外部表,外部表省去了繁琐的数据导入工作,让Doris可以具有了访问各式数据库的能力,并借助Doris本身的OLAP的能力来解决外部表的数据分析问题: - - 1. 支持各种数据源接入Doris - 2. 支持Doris与各种数据源中的表联合查询,进行更加复杂的分析操作 - 3. 通过insert into将Doris执行的查询结果写入外部的数据源 - -本文档主要介绍该功能的实现原理、使用方式等。 - -## 名词解释 - -### Doris相关 -* FE:Frontend,Doris 的前端节点,负责元数据管理和请求接入 -* BE:Backend,Doris 的后端节点,负责查询执行和数据存储 - -## 使用方法 - -### Doris中创建ODBC的外表 - -#### 1. 不使用Resource创建ODBC的外表 - -``` -CREATE EXTERNAL TABLE `baseall_oracle` ( - `k1` decimal(9, 3) NOT NULL COMMENT "", - `k2` char(10) NOT NULL COMMENT "", - `k3` datetime NOT NULL COMMENT "", - `k5` varchar(20) NOT NULL COMMENT "", - `k6` double NOT NULL COMMENT "" -) ENGINE=ODBC -COMMENT "ODBC" -PROPERTIES ( -"host" = "192.168.0.1", -"port" = "8086", -"user" = "test", -"password" = "test", -"database" = "test", -"table" = "baseall", -"driver" = "Oracle 19 ODBC driver", -"odbc_type" = "oracle" -); -``` - -#### 2. 通过ODBC_Resource来创建ODBC外表 (推荐使用的方式) -``` -CREATE EXTERNAL RESOURCE `oracle_odbc` -PROPERTIES ( -"type" = "odbc_catalog", -"host" = "192.168.0.1", -"port" = "8086", -"user" = "test", -"password" = "test", -"database" = "test", -"odbc_type" = "oracle", -"driver" = "Oracle 19 ODBC driver" -); - -CREATE EXTERNAL TABLE `baseall_oracle` ( - `k1` decimal(9, 3) NOT NULL COMMENT "", - `k2` char(10) NOT NULL COMMENT "", - `k3` datetime NOT NULL COMMENT "", - `k5` varchar(20) NOT NULL COMMENT "", - `k6` double NOT NULL COMMENT "" -) ENGINE=ODBC -COMMENT "ODBC" -PROPERTIES ( -"odbc_catalog_resource" = "oracle_odbc", -"database" = "test", -"table" = "baseall" -); -``` -参数说明: - -参数 | 说明 ----|--- -**hosts** | 外表数据库的IP地址 -**driver** | ODBC外表的Driver名,该名字需要和be/conf/odbcinst.ini中的Driver名一致。 -**odbc_type** | 外表数据库的类型,当前支持oracle, mysql, postgresql -**user** | 外表数据库的用户名 -**password** | 对应用户的密码信息 - - - -##### ODBC Driver的安装和配置 - -各大主流数据库都会提供ODBC的访问Driver,用户可以执行参照参照各数据库官方推荐的方式安装对应的ODBC Driver LiB库。 - - -安装完成之后,查找对应的数据库的Driver Lib库的路径,并且修改be/conf/odbcinst.ini的配置: -``` -[MySQL Driver] -Description = ODBC for MySQL -Driver = /usr/lib64/libmyodbc8w.so -FileUsage = 1 -``` -* 上述配置`[]`里的对应的是Driver名,在建立外部表时需要保持外部表的Driver名和配置文件之中的一致。 -* `Driver=` 这个要根据实际BE安装Driver的路径来填写,本质上就是一个动态库的路径,这里需要保证该动态库的前置依赖都被满足。 - -**切记,这里要求所有的BE节点都安装上相同的Driver,并且安装路径相同,同时有相同的be/conf/odbcinst.ini的配置。** - - -### 查询用法 - -完成在Doris中建立ODBC外表后,除了无法使用Doris中的数据模型(rollup、预聚合、物化视图等)外,与普通的Doris表并无区别 - - -``` -select * from oracle_table where k1 > 1000 and k3 ='term' or k4 like '%doris'; -``` - -### 数据写入 - -在Doris中建立ODBC外表后,可以通过insert into语句直接写入数据,也可以将Doris执行完查询之后的结果写入ODBC外表,或者是从一个ODBC外表将数据导入另一个ODBC外表。 - - -``` -insert into oracle_table values(1, "doris"); -insert into oracle_table select * from postgre_table; -``` -#### 事务 - -Doris的数据是由一组batch的方式写入外部表的,如果中途导入中断,之前写入数据可能需要回滚。所以ODBC外表支持数据写入时的事务,事务的支持需要通过session variable:`enable_odbc_transcation `设置。 - -``` -set enable_odbc_transcation = true; -``` - -事务保证了ODBC外表数据写入的原子性,但是一定程度上会降低数据写入的性能,可以考虑酌情开启该功能。 - -## 数据库ODBC版本对应关系 - -### Centos操作系统 - -使用的unixODBC版本是:2.3.1,Doris 0.15,centos 7.9,全部使用yum方式安装。 - -#### 1.mysql - -| Mysql版本 | Mysql ODBC版本 | -| --------- | -------------- | -| 8.0.27 | 8.0.27,8.026 | -| 5.7.36 | 5.3.11,5.3.13 | -| 5.6.51 | 5.3.11,5.3.13 | -| 5.5.62 | 5.3.11,5.3.13 | - -#### 2.PostgreSQL - -PostgreSQL的yum 源 rpm包地址: - -``` -https://download.postgresql.org/pub/repos/yum/reporpms/EL-7-x86_64/pgdg-redhat-repo-latest.noarch.rpm -``` - -这里面包含PostgreSQL从9.x 到 14.x的全部版本,包括对应的ODBC版本,可以根据需要选择安装。 - -| PostgreSQL版本 | PostgreSQL ODBC版本 | -| -------------- | ---------------------------- | -| 12.9 | postgresql12-odbc-13.02.0000 | -| 13.5 | postgresql13-odbc-13.02.0000 | -| 14.1 | postgresql14-odbc-13.02.0000 | -| 9.6.24 | postgresql96-odbc-13.02.0000 | -| 10.6 | postgresql10-odbc-13.02.0000 | -| 11.6 | postgresql11-odbc-13.02.0000 | - -#### 3.Oracle - -| Oracle版本 | Oracle ODBC版本 | -| ------------------------------------------------------------ | ------------------------------------------ | -| Oracle Database 11g Enterprise Edition Release 11.2.0.1.0 - 64bit Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | -| Oracle Database 12c Standard Edition Release 12.2.0.1.0 - 64bit Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | -| Oracle Database 18c Enterprise Edition Release 18.0.0.0.0 - Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | -| Oracle Database 19c Enterprise Edition Release 19.0.0.0.0 - Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | -| Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production | oracle-instantclient19.13-odbc-19.13.0.0.0 | - -Oracle ODBC驱动版本下载地址: - -``` -https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-sqlplus-19.13.0.0.0-2.x86_64.rpm -https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-devel-19.13.0.0.0-2.x86_64.rpm -https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-odbc-19.13.0.0.0-2.x86_64.rpm -https://download.oracle.com/otn_software/linux/instantclient/1913000/oracle-instantclient19.13-basic-19.13.0.0.0-2.x86_64.rpm -``` - -### Ubuntu操作系统 - -使用的unixODBC版本是:2.3.4,Doris 0.15,Ubuntu 20.04 - -#### 1.Mysql - -| Mysql版本 | Mysql ODBC版本 | -| --------- | -------------- | -| 8.0.27 | 8.0.11,5.3.13 | - -目前只测试了这一个版本其他版本测试后补充 - -#### 2.PostgreSQL - -| PostgreSQL版本 | PostgreSQL ODBC版本 | -| -------------- | ------------------- | -| 12.9 | psqlodbc-12.02.0000 | - -其他版本只要下载和数据库大版本相符合的ODBC驱动版本,问题不大,这块后续会持续补充其他版本在Ubuntu系统下的测试结果。 - -#### 3.Oracle - -同上Centos操作系统的Oracle数据库及ODBC对应关系,在ubuntu下安装rpm软件包使用下面方式。 - -为了在ubuntu下可以进行安装rpm包,我们还需要安装一个alien,这是一个可以将rpm包转换成deb安装包的工具 - -``` -sudo apt-get install alien -``` - -然后执行安装上面四个包 - -``` -sudo alien -i oracle-instantclient19.13-basic-19.13.0.0.0-2.x86_64.rpm -sudo alien -i oracle-instantclient19.13-devel-19.13.0.0.0-2.x86_64.rpm -sudo alien -i oracle-instantclient19.13-odbc-19.13.0.0.0-2.x86_64.rpm -sudo alien -i oracle-instantclient19.13-sqlplus-19.13.0.0.0-2.x86_64.rpm -``` - - -## 类型匹配 - -各个数据库之间数据类型存在不同,这里列出了各个数据库中的类型和Doris之中数据类型匹配的情况。 - -### MySQL - -| MySQL | Doris | 替换方案 | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| CHAR | CHAR | 当前仅支持UTF8编码 | -| VARCHAR | VARCHAR | 当前仅支持UTF8编码 | -| DATE | DATE | | -| FLOAT | FLOAT | | -| TINYINT | TINYINT | | -| SMALLINT | SMALLINT | | -| INT | INT | | -| BIGINT | BIGINT | | -| DOUBLE | DOUBLE | | -| DATETIME | DATETIME | | -| DECIMAL | DECIMAL | | - -### PostgreSQL - -| PostgreSQL | Doris | 替换方案 | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| CHAR | CHAR | 当前仅支持UTF8编码 | -| VARCHAR | VARCHAR | 当前仅支持UTF8编码 | -| DATE | DATE | | -| REAL | FLOAT | | -| SMALLINT | SMALLINT | | -| INT | INT | | -| BIGINT | BIGINT | | -| DOUBLE | DOUBLE | | -| TIMESTAMP | DATETIME | | -| DECIMAL | DECIMAL | | - -### Oracle - -| Oracle | Doris | 替换方案 | -| :------: | :----: | :-------------------------------: | -| 不支持 | BOOLEAN | Oracle可用number(1) 替换boolean | -| CHAR | CHAR | | -| VARCHAR | VARCHAR | | -| DATE | DATE | | -| FLOAT | FLOAT | | -| 无 | TINYINT | Oracle可由NUMMBER替换 | -| SMALLINT | SMALLINT | | -| INT | INT | | -| 无 | BIGINT | Oracle可由NUMMBER替换 | -| 无 | DOUBLE | Oracle可由NUMMBER替换 | -| DATETIME | DATETIME | | -| NUMBER | DECIMAL | | - -### SQLServer - -| SQLServer | Doris | 替换方案 | -| :------: | :----: | :-------------------------------: | -| BOOLEAN | BOOLEAN | | -| CHAR | CHAR | 当前仅支持UTF8编码 | -| VARCHAR | VARCHAR | 当前仅支持UTF8编码 | -| DATE | DATE | | -| REAL | FLOAT | | -| TINYINT | TINYINT | | -| SMALLINT | SMALLINT | | -| INT | INT | | -| BIGINT | BIGINT | | -| FLOAT | DOUBLE | | -| DATETIME/DATETIME2 | DATETIME | | -| DECIMAL/NUMERIC | DECIMAL | | - -## Q&A - -1. 与原先的MySQL外表的关系 - - 在接入ODBC外表之后,原先的访问MySQL外表的方式将被逐渐弃用。如果之前没有使用过MySQL外表,建议新接入的MySQL表直接使用ODBC的MySQL外表。 - -2. 除了MySQL,Oracle,PostgreSQL,SQLServer是否能够支持更多的数据库 - - 目前Doris只适配了MySQL,Oracle,PostgreSQL,SQLServer,关于其他的数据库的适配工作正在规划之中,原则上来说任何支持ODBC访问的数据库都能通过ODBC外表来访问。如果您有访问其他外表的需求,欢迎修改代码并贡献给Doris。 - -3. 什么场合适合通过外表访问 - - 通常在外表数据量较小,少于100W条时,可以通过外部表的方式访问。由于外表无法发挥Doris在存储引擎部分的能力和会带来额外的网络开销,所以建议根据实际对查询的访问时延要求来确定是否通过外部表访问还是将数据导入Doris之中。 - -4. 通过Oracle访问出现乱码 - - 尝试在BE启动脚本之中添加如下参数:`export NLS_LANG=AMERICAN_AMERICA.AL32UTF8`, 并重新启动所有BE - -5. ANSI Driver or Unicode Driver ? - - 当前ODBC支持ANSI 与 Unicode 两种Driver形式,当前Doris只支持Unicode Driver。如果强行使用ANSI Driver可能会导致查询结果出错。 - -6. 报错 `driver connect Err: 01000 [unixODBC][Driver Manager]Can't open lib 'Xxx' : file not found (0)` - - 没有在每一个BE上安装好对应数据的Driver,或者是没有在be/conf/odbcinst.ini配置正确的路径,亦或是建表是Driver名与be/conf/odbcinst.ini不同 - -7. 报错 `Fail to convert odbc value 'PALO ' TO INT on column:'A'` - - ODBC外表的A列类型转换出错,说明外表的实际列与ODBC的映射列的数据类型不同,需要修改列的类型映射 - -8. 同时使用旧的MySQL表与ODBC外表的Driver时出现程序Crash - - 这个是MySQL数据库的Driver与现有Doris依赖MySQL外表的兼容问题。推荐解决的方式如下: - * 方式1:通过ODBC外表替换旧的MySQL外表,并重新编译BE,关闭WITH_MYSQL的选项 - * 方式2:不使用最新8.X的MySQL的ODBC Driver,而是使用5.X的MySQL的ODBC Driver - -9. 过滤条件下推 - 当前ODBC外表支持过滤条件下推,目前MySQL的外表是能够支持所有条件下推的。其他的数据库的函数与Doris不同会导致下推查询失败。目前除MySQL外表之外,其他的数据库不支持函数调用的条件下推。Doris是否将所需过滤条件下推,可以通过`explain` 查询语句进行确认。 - -10. 报错`driver connect Err: xxx` - - 通常是连接数据库失败,Err部分代表了不同的数据库连接失败的报错。这种情况通常是配置存在问题。可以检查是否错配了ip地址,端口或账号密码。 diff --git a/docs/zh-CN/extending-doris/plugin-development-manual.md b/docs/zh-CN/extending-doris/plugin-development-manual.md deleted file mode 100644 index 49ff7b248d..0000000000 --- a/docs/zh-CN/extending-doris/plugin-development-manual.md +++ /dev/null @@ -1,312 +0,0 @@ ---- -{ - "title": "插件开发手册", - "language": "zh-CN" -} ---- - - - -# Doris 插件框架 - -## 介绍 - -Doris 的插件框架支持在运行时添加/卸载自定义插件,而不需要重启服务,用户可以通过开发自己的插件来扩展Doris的功能。 - -例如,审计插件作用于 Doris 请求执行后,可以获取到一次请求相关的信息(访问用户,请求IP,SQL等...),并将信息写入到指定的表中。 - -与UDF的区别: -* UDF是函数,用于在SQL执行时进行数据计算。插件是附加功能,用于为Doris扩展自定义的功能,例如:支持不同的存储引擎,支持不同的导入方式,插件并不会参与执行SQL时的数据计算。 -* UDF的执行周期仅限于一次SQL执行。插件的执行周期可能与Doris进程相同。 -* 使用场景不同。如果您需要执行SQL时支持特殊的数据算法,那么推荐使用UDF,如果您需要在Doris上运行自定义的功能,或者是启动一个后台线程执行任务,那么推荐使用插件。 - -目前插件框架仅支持审计类插件。 - -> 注意: -> Doris的插件框架是实验性功能, 目前只支持FE插件,且默认是关闭的,可以通过FE配置`plugin_enable=true`打开 - -## 插件 - -一个FE的插件可以使一个**zip压缩包**或者是一个**目录**。其内容至少包含两个文件:`plugin.properties` 和 `.jar` 文件。`plugin.properties`用于描述插件信息。 - -文件结构如下: - -``` -# plugin .zip -auditodemo.zip: - -plugin.properties - -auditdemo.jar - -xxx.config - -data/ - -test_data/ - -# plugin local directory -auditodemo/: - -plugin.properties - -auditdemo.jar - -xxx.config - -data/ - -test_data/ -``` - -`plugin.properties` 内容示例: - -``` -### required: -# -# the plugin name -name = audit_plugin_demo -# -# the plugin type -type = AUDIT -# -# simple summary of the plugin -description = just for test -# -# Doris's version, like: 0.11.0 -version = 0.11.0 - -### FE-Plugin optional: -# -# version of java the code is built against -# use the command "java -version" value, like 1.8.0, 9.0.1, 13.0.4 -java.version = 1.8.31 -# -# the name of the class to load, fully-qualified. -classname = AuditPluginDemo - -### BE-Plugin optional: -# the name of the so to load -soName = example.so -``` - -## 编写插件 - -插件的开发环境依赖Doris的开发编译环境。所以请先确保Doris的开发编译环境运行正常。 - -`fe_plugins` 目录是 FE 插件的根模块。这个根模块统一管理插件所需的依赖。添加一个新的插件,相当于在这个根模块添加一个子模块。 - -### 创建插件模块 - -我们可以通过以下命令在 `fe_plugins` 目录创建一个子模块用户实现创建和创建工程。其中 `doris-fe-test` 为插件名称。 - -``` -mvn archetype: generate -DarchetypeCatalog = internal -DgroupId = org.apache -DartifactId = doris-fe-test -DinteractiveMode = false -``` - -这个命令会创建一个新的 maven 工程,并且自动向 `fe_plugins/pom.xml` 中添加一个子模块: - -``` -    ..... -    org.apache -    doris-fe-plugins -    pom -    1.0-SNAPSHOT -     -        auditdemo -        # new plugin module -        doris-fe-test -     -    ..... -``` - -新的工程目录结构如下: - -``` --doris-fe-test/ --pom.xml --src/ - ---- main/java/org/apache/ - ------- App.java # mvn auto generate, ignore - ---- test/java/org/apache -``` - -接下来我们在 `main` 目录下添加一个 `assembly` 目录来存放 `plugin.properties` 和 `zip.xml`。最终的工程目录结构如下: - -``` --doris-fe-test/ --pom.xml --src/ ----- main/ ------- assembly/ --------- plugin.properties --------- zip.xml ------- java/org/apache/ ---------App.java # mvn auto generate, ignore ----- test/java/org/apache -``` - -### 添加 zip.xml - -`zip.xml` 用于描述最终生成的 zip 压缩包中的文件内容。(如 .jar file, plugin.properties 等等) - -``` - -    plugin -     -        zip -     -     -    false -     -         -            target -             -                *.jar -             -            / -         - -         -            src/main/assembly -             -                plugin.properties -             -            / -         -     - -``` - -### 更新 pom.xml - -接下来我们需要更新子模块的 `pom.xml` 文件,添加 doris-fe 依赖: - -``` - - - - org.apache - doris-fe-plugins - 1.0-SNAPSHOT - - 4.0.0 - - auditloader - jar - - - - - org.apache - doris-fe - - - - - ... - - - - - auditloader - - - maven-assembly-plugin - 2.4.1 - - false - - src/main/assembly/zip.xml - - - - - make-assembly - package - - single - - - - - - - - -``` - -### 实现插件 - -之后我们就可以开始进行插件功能的开发了。插件需要实现 `Plugin` 接口。具体可以参阅 Doris 自带的 `auditdemo` 插件示例代码。 - -### 编译 - -在编译插件之前,需要先执行 `sh build.sh --fe` 进行 Doris FE 代码的编译,并确保编译成功。 - -之后,执行 `sh build_plugin.sh` 编译所有插件。最终的产出会存放在 `fe_plugins/output` 目录中。 - -或者也可以执行 `sh build_plugin.sh --plugin your_plugin_name` 来仅编译指定的插件。 - -### 另一种开发方式 - -您可以直接通过修改自带的 `auditdemo` 插件示例代码进行开发。 - -## 部署 - -插件可以通过以下三种方式部署。 - -* 将 `.zip` 文件放在 Http 或 Https 服务器上。如:`http://xxx.xxx.com/data/my_plugin.zip`, Doris 会下载这个文件。同时需要在properties中设置md5sum的值,或者放置一个和 `.zip` 文件同名的 md5 文件,如 `http://xxx.xxxxxx.com/data/my_plugin.zip.md5`。其中内容为 .zip 文件的 MD5 值。 -* 本地 `.zip` 文件。 如:`/home/work/data/plugin.zip`。如果该插件仅用于 FE,则需部署在所有 FE 节点相同的目录下。否则,需要在所有 FE 和 BE 节点部署。 -* 本地目录。如:`/home/work/data/plugin/`。相当于 `.zip` 文件解压后的目录。如果该插件仅用于 FE,则需部署在所有 FE 节点相同的目录下。否则,需要在所有 FE 和 BE 节点部署。 - -注意:需保证部署路径在整个插件生命周期内有效。 - -## 安装和卸载插件 - -通过如下命令安装和卸载插件。更多帮助请参阅 `HELP INSTALL PLUGIN;` `HELP IUNNSTALL PLUGIN;` `HELP SHOW PLUGINS;` - -``` -mysql> install plugin from "/home/users/doris/auditloader.zip"; -Query OK, 0 rows affected (0.09 sec) - -mysql> show plugins\G -*************************** 1. row *************************** - Name: auditloader - Type: AUDIT -Description: load audit log to olap load, and user can view the statistic of queries - Version: 0.12.0 -JavaVersion: 1.8.31 - ClassName: AuditLoaderPlugin - SoName: NULL - Sources: /home/users/doris/auditloader.zip - Status: INSTALLED - Properties: {} -*************************** 2. row *************************** - Name: AuditLogBuilder - Type: AUDIT -Description: builtin audit logger - Version: 0.12.0 -JavaVersion: 1.8.31 - ClassName: org.apache.doris.qe.AuditLogBuilder - SoName: NULL - Sources: Builtin - Status: INSTALLED - Properties: {} -2 rows in set (0.00 sec) - -mysql> uninstall plugin auditloader; -Query OK, 0 rows affected (0.05 sec) - -mysql> show plugins; -Empty set (0.00 sec) -``` diff --git a/docs/zh-CN/extending-doris/seatunnel/flink-sink.md b/docs/zh-CN/extending-doris/seatunnel/flink-sink.md deleted file mode 100644 index e558809cec..0000000000 --- a/docs/zh-CN/extending-doris/seatunnel/flink-sink.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -{ - "title": "Seatunnel Connector Flink Doris", - "language": "zh-CN" -} ---- - - - -# Seatunnel -最新版本的 [Apache SeaTunnel (原 waterdrop )](https://seatunnel.apache.org/zh-CN/) 已经支持 Doris 的连接器, SeaTunnel 可以用过 Spark 引擎和 Flink 引擎同步数据至 Doris 中. - -## Flink Sink Doris(2.x) -Seatunnel Flink Sink Doris [插件代码](https://github.com/apache/incubator-seatunnel/tree/dev/seatunnel-connectors/seatunnel-connector-flink-doris) -### 参数列表 -| 配置项 | 类型 | 必填 | 默认值 | 支持引擎 | -| --- | --- | --- | --- | --- | -| fenodes | string | yes | - | Flink | -| database | string | yes | - | Flink | -| table | string | yes | - | Flink | -| user | string | yes | - | Flink | -| password | string | yes | - | Flink | -| batch_size | int | no | 100 | Flink | -| interval | int | no |1000 | Flink | -| max_retries | int | no | 1 | Flink| -| doris.* | - | no | - | Flink | - -`fenodes [string]` - -Doris Fe Http访问地址, eg: 127.0.01:8030 - -`database [string]` - -写入 Doris 的库名 - -`table [string]` - -写入 Doris 的表名 - -`user [string]` - -Doris 访问用户 - -`password [string]` - -Doris 访问用户密码 - -`batch_size [int]` - -单次写Doris的最大行数,默认值100 - -`interval [int]` - -flush 间隔时间(毫秒),超过该时间后异步线程将 缓存中数据写入Doris。设置为0表示关闭定期写入。 - -`max_retries [int]` - -写Doris失败之后的重试次数 - -`doris.* [string]` - -Stream load 的导入参数。例如:'doris.column_separator' = ', '等 - -[更多 Stream Load 参数配置](https://doris.apache.org/zh-CN/administrator-guide/load-data/stream-load-manual.html) - -### Examples -Socket 数据写入 Doris -``` -env { - execution.parallelism = 1 -} -source { - SocketStream { - host = 127.0.0.1 - port = 9999 - result_table_name = "socket" - field_name = "info" - } -} -transform { -} -sink { - DorisSink { - fenodes = "127.0.0.1:8030" - user = root - password = 123456 - database = test - table = test_tbl - batch_size = 5 - max_retries = 1 - interval = 5000 - } -} - -``` -### 启动命令 -``` -sh bin/start-seatunnel-flink.sh --config config/flink.streaming.conf -``` \ No newline at end of file diff --git a/docs/zh-CN/extending-doris/seatunnel/spark-sink.md b/docs/zh-CN/extending-doris/seatunnel/spark-sink.md deleted file mode 100644 index 3e3486393d..0000000000 --- a/docs/zh-CN/extending-doris/seatunnel/spark-sink.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -{ - "title": "Seatunnel Connector Spark Doris", - "language": "zh-CN" -} ---- - - - -# SeaTunnel -最新版本的 [Apache SeaTunnel (原 waterdrop )](https://seatunnel.apache.org/zh-CN/) 已经支持 Doris 的连接器, SeaTunnel 可以用过 Spark 引擎和 Flink 引擎同步数据至 Doris 中. - -事实上, SeaTunnel 通过 Stream Load 方式同步数据,性能强劲,欢迎大家使用 - -#安装 SeaTunnel -[SeaTunnel 安装链接](https://interestinglab.github.io/seatunnel-docs/#/zh-cn/v2/flink/installation) - -## Spark Sink Doris - -### 插件代码 -Spark Sink Doris 的插件代码在[这里](https://github.com/InterestingLab/seatunnel/tree/dev/seatunnel-connectors/plugin-spark-sink-doris) -### 参数列表 -| 参数名 | 参数类型 | 是否必要 | 默认值 | 引擎类型 | -| --- | --- | --- | --- | --- | -| fenodes | string | yes | - | Spark | -| database | string | yes | - | Spark | -| table | string | yes | - | Spark | -| user | string | yes | - | Spark | -| password | string | yes | - | Spark | -| batch_size | int | yes | 100 | Spark | -| doris.* | string | no | - | Spark | - -`fenodes [string]` - -Doris Fe节点地址:8030 - - -`database [string]` - -写入 Doris 的库名 - -`table [string]` - -写入 Doris 的表名 - -`user [string]` - -Doris 访问用户 - -`password [string]` - -Doris 访问用户密码 - -`batch_size [string]` - -Spark 通过 Stream Load 方式写入,每个批次提交条数 - -`doris. [string]` - -Stream Load 方式写入的 Http 参数优化,在官网参数前加上'Doris.'前缀 - -[更多 Stream Load 参数配置](https://doris.apache.org/master/zh-CN/administrator-guide/load-data/stream-load-manual.html) - -### Examples -Hive 迁移数据至 Doris -``` -env{ - spark.app.name = "hive2doris-template" -} - -spark { - spark.sql.catalogImplementation = "hive" -} - -source { - hive { - preSql = "select * from tmp.test" - result_table_name = "test" - } -} - -transform { -} - - -sink { - -Console { - - } - -Doris { - fenodes="xxxx:8030" - database="tmp" - table="test" - user="root" - password="root" - batch_size=1000 - doris.column_separator="\t" - doris.columns="date_key,date_value,day_in_year,day_in_month" - } -} -``` -启动命令 -``` -sh bin/start-waterdrop-spark.sh --master local[4] --deploy-mode client --config ./config/spark.conf -``` \ No newline at end of file diff --git a/docs/zh-CN/extending-doris/spark-doris-connector.md b/docs/zh-CN/extending-doris/spark-doris-connector.md deleted file mode 100644 index 9773a63e6b..0000000000 --- a/docs/zh-CN/extending-doris/spark-doris-connector.md +++ /dev/null @@ -1,291 +0,0 @@ ---- -{ - "title": "Spark Doris Connector", - "language": "zh-CN" -} ---- - - - -# Spark Doris Connector - -Spark Doris Connector 可以支持通过 Spark 读取 Doris 中存储的数据,也支持通过Spark写入数据到Doris。 - -代码库地址:https://github.com/apache/incubator-doris-spark-connector - -- 支持从`Doris`中读取数据 -- 支持`Spark DataFrame`批量/流式 写入`Doris` -- 可以将`Doris`表映射为`DataFrame`或者`RDD`,推荐使用`DataFrame`。 -- 支持在`Doris`端完成数据过滤,减少数据传输量。 - -## 版本兼容 - -| Connector | Spark | Doris | Java | Scala | -|---------------| ----- | ------ | ---- | ----- | -| 2.3.4-2.11.xx | 2.x | 0.12+ | 8 | 2.11 | -| 3.1.2-2.12.xx | 3.x | 0.12.+ | 8 | 2.12 | - -## 编译与安装 - -准备工作 - -1.修改`custom_env.sh.tpl`文件,重命名为`custom_env.sh` - -2.指定thrift安装目录 - -```bash -##源文件内容 -#export THRIFT_BIN= -#export MVN_BIN= -#export JAVA_HOME= - -##修改如下,MacOS为例 -export THRIFT_BIN=/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift -#export MVN_BIN= -#export JAVA_HOME= - -安装 `thrift` 0.13.0 版本(注意:`Doris` 0.15 和最新的版本基于 `thrift` 0.13.0 构建, 之前的版本依然使用`thrift` 0.9.3 构建) - Windows: - 1.下载:`http://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.exe`(下载目录自己指定) - 2.修改thrift-0.13.0.exe 为 thrift - - MacOS: - 1. 下载:`brew install thrift@0.13.0` - 2. 默认下载地址:/opt/homebrew/Cellar/thrift@0.13.0/0.13.0/bin/thrift - - - 注:MacOS执行 `brew install thrift@0.13.0` 可能会报找不到版本的错误,解决方法如下,在终端执行: - 1. `brew tap-new $USER/local-tap` - 2. `brew extract --version='0.13.0' thrift $USER/local-tap` - 3. `brew install thrift@0.13.0` - 参考链接: `https://gist.github.com/tonydeng/02e571f273d6cce4230dc8d5f394493c` - - Linux: - 1.下载源码包:`wget https://archive.apache.org/dist/thrift/0.13.0/thrift-0.13.0.tar.gz` - 2.安装依赖:`yum install -y autoconf automake libtool cmake ncurses-devel openssl-devel lzo-devel zlib-devel gcc gcc-c++` - 3.`tar zxvf thrift-0.13.0.tar.gz` - 4.`cd thrift-0.13.0` - 5.`./configure --without-tests` - 6.`make` - 7.`make install` - 安装完成后查看版本:thrift --version - 注:如果编译过Doris,则不需要安装thrift,可以直接使用 $DORIS_HOME/thirdparty/installed/bin/thrift -``` - -在源码目录下执行: - -```bash -sh build.sh 2.3.4 2.11 ## spark 2.3.4, scala 2.11 -sh build.sh 3.1.2 2.12 ## spark 3.1.2, scala 2.12 - -``` -> 注:如果你是从 tag 检出的源码,则可以直接执行 `sh build.sh --tag`,而无需指定 spark 和 scala 的版本。因为 tag 源码中的版本是固定的。 - -编译成功后,会在 `output/` 目录下生成文件 `doris-spark-2.3.4-2.11-1.0.0-SNAPSHOT.jar`。将此文件复制到 `Spark` 的 `ClassPath` 中即可使用 `Spark-Doris-Connector`。例如,`Local` 模式运行的 `Spark`,将此文件放入 `jars/` 文件夹下。`Yarn`集群模式运行的`Spark`,则将此文件放入预部署包中。 - -## 使用Maven管理 - -``` - - org.apache.doris - spark-doris-connector-3.1_2.12 - - 1.0.1 - -``` - -**注意** - -请根据不同的 Spark 和 Scala 版本替换相应的 Connector 版本。 - -## 使用示例 -### 读取 - -#### SQL - -```sql -CREATE TEMPORARY VIEW spark_doris -USING doris -OPTIONS( - "table.identifier"="$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME", - "fenodes"="$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT", - "user"="$YOUR_DORIS_USERNAME", - "password"="$YOUR_DORIS_PASSWORD" -); - -SELECT * FROM spark_doris; -``` - -#### DataFrame - -```scala -val dorisSparkDF = spark.read.format("doris") - .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME") - .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT") - .option("user", "$YOUR_DORIS_USERNAME") - .option("password", "$YOUR_DORIS_PASSWORD") - .load() - -dorisSparkDF.show(5) -``` - -#### RDD - -```scala -import org.apache.doris.spark._ -val dorisSparkRDD = sc.dorisRDD( - tableIdentifier = Some("$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME"), - cfg = Some(Map( - "doris.fenodes" -> "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT", - "doris.request.auth.user" -> "$YOUR_DORIS_USERNAME", - "doris.request.auth.password" -> "$YOUR_DORIS_PASSWORD" - )) -) - -dorisSparkRDD.collect() -``` - -### 写入 - -#### SQL - -```sql -CREATE TEMPORARY VIEW spark_doris -USING doris -OPTIONS( - "table.identifier"="$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME", - "fenodes"="$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT", - "user"="$YOUR_DORIS_USERNAME", - "password"="$YOUR_DORIS_PASSWORD" -); - -INSERT INTO spark_doris VALUES ("VALUE1","VALUE2",...); -# or -INSERT INTO spark_doris SELECT * FROM YOUR_TABLE -``` - -#### DataFrame(batch/stream) - -```scala -## batch sink -val mockDataDF = List( - (3, "440403001005", "21.cn"), - (1, "4404030013005", "22.cn"), - (33, null, "23.cn") -).toDF("id", "mi_code", "mi_name") -mockDataDF.show(5) - -mockDataDF.write.format("doris") - .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME") - .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT") - .option("user", "$YOUR_DORIS_USERNAME") - .option("password", "$YOUR_DORIS_PASSWORD") - //其它选项 - //指定你要写入的字段 - .option("doris.write.fields","$YOUR_FIELDS_TO_WRITE") - .save() - -## stream sink(StructuredStreaming) -val kafkaSource = spark.readStream - .option("kafka.bootstrap.servers", "$YOUR_KAFKA_SERVERS") - .option("startingOffsets", "latest") - .option("subscribe", "$YOUR_KAFKA_TOPICS") - .format("kafka") - .load() -kafkaSource.selectExpr("CAST(key AS STRING)", "CAST(value as STRING)") - .writeStream - .format("doris") - .option("checkpointLocation", "$YOUR_CHECKPOINT_LOCATION") - .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME") - .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT") - .option("user", "$YOUR_DORIS_USERNAME") - .option("password", "$YOUR_DORIS_PASSWORD") - //其它选项 - //指定你要写入的字段 - .option("doris.write.fields","$YOUR_FIELDS_TO_WRITE") - .start() - .awaitTermination() -``` - - - -## 配置 - -### 通用配置项 - -| Key | Default Value | Comment | -| -------------------------------- | ----------------- | ------------------------------------------------------------ | -| doris.fenodes | -- | Doris FE http 地址,支持多个地址,使用逗号分隔 | -| doris.table.identifier | -- | Doris 表名,如:db1.tbl1 | -| doris.request.retries | 3 | 向Doris发送请求的重试次数 | -| doris.request.connect.timeout.ms | 30000 | 向Doris发送请求的连接超时时间 | -| doris.request.read.timeout.ms | 30000 | 向Doris发送请求的读取超时时间 | -| doris.request.query.timeout.s | 3600 | 查询doris的超时时间,默认值为1小时,-1表示无超时限制 | -| doris.request.tablet.size | Integer.MAX_VALUE | 一个RDD Partition对应的Doris Tablet个数。
此数值设置越小,则会生成越多的Partition。从而提升Spark侧的并行度,但同时会对Doris造成更大的压力。 | -| doris.batch.size | 1024 | 一次从BE读取数据的最大行数。增大此数值可减少Spark与Doris之间建立连接的次数。
从而减轻网络延迟所带来的的额外时间开销。 | -| doris.exec.mem.limit | 2147483648 | 单个查询的内存限制。默认为 2GB,单位为字节 | -| doris.deserialize.arrow.async | false | 是否支持异步转换Arrow格式到spark-doris-connector迭代所需的RowBatch | -| doris.deserialize.queue.size | 64 | 异步转换Arrow格式的内部处理队列,当doris.deserialize.arrow.async为true时生效 | -| doris.write.fields | -- | 指定写入Doris表的字段或者字段顺序,多列之间使用逗号分隔。
默认写入时要按照Doris表字段顺序写入全部字段。 | -| sink.batch.size | 10000 | 单次写BE的最大行数 | -| sink.max-retries | 1 | 写BE失败之后的重试次数 | - -### SQL 和 Dataframe 专有配置 - -| Key | Default Value | Comment | -| ------------------------------- | ------------- | ------------------------------------------------------------ | -| user | -- | 访问Doris的用户名 | -| password | -- | 访问Doris的密码 | -| doris.filter.query.in.max.count | 100 | 谓词下推中,in表达式value列表元素最大数量。超过此数量,则in表达式条件过滤在Spark侧处理。 | - -### RDD 专有配置 - -| Key | Default Value | Comment | -| --------------------------- | ------------- | ------------------------------------------------------------ | -| doris.request.auth.user | -- | 访问Doris的用户名 | -| doris.request.auth.password | -- | 访问Doris的密码 | -| doris.read.field | -- | 读取Doris表的列名列表,多列之间使用逗号分隔 | -| doris.filter.query | -- | 过滤读取数据的表达式,此表达式透传给Doris。Doris使用此表达式完成源端数据过滤。 | - - -## Doris 和 Spark 列类型映射关系 - -| Doris Type | Spark Type | -| ---------- | -------------------------------- | -| NULL_TYPE | DataTypes.NullType | -| BOOLEAN | DataTypes.BooleanType | -| TINYINT | DataTypes.ByteType | -| SMALLINT | DataTypes.ShortType | -| INT | DataTypes.IntegerType | -| BIGINT | DataTypes.LongType | -| FLOAT | DataTypes.FloatType | -| DOUBLE | DataTypes.DoubleType | -| DATE | DataTypes.StringType1 | -| DATETIME | DataTypes.StringType1 | -| BINARY | DataTypes.BinaryType | -| DECIMAL | DecimalType | -| CHAR | DataTypes.StringType | -| LARGEINT | DataTypes.StringType | -| VARCHAR | DataTypes.StringType | -| DECIMALV2 | DecimalType | -| TIME | DataTypes.DoubleType | -| HLL | Unsupported datatype | - -* 注:Connector中,将`DATE`和`DATETIME`映射为`String`。由于`Doris`底层存储引擎处理逻辑,直接使用时间类型时,覆盖的时间范围无法满足需求。所以使用 `String` 类型直接返回对应的时间可读文本。 diff --git a/docs/zh-CN/extending-doris/udf/contribute-udf.md b/docs/zh-CN/extending-doris/udf/contribute-udf.md deleted file mode 100644 index c7b06fe2fe..0000000000 --- a/docs/zh-CN/extending-doris/udf/contribute-udf.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -{ - "title": "贡献 UDF ", - "language": "zh-CN" -} ---- - - - -# 贡献 UDF - -该手册主要讲述了外部用户如何将自己编写的 UDF 函数贡献给 Doris 社区。 - -## 前提条件 - -1. UDF 函数具有通用性 - - 这里的通用性主要指的是:UDF 函数在某些业务场景下,被广泛使用。也就是说 UDF 函数具有复用价值,可被社区内其他用户直接使用。 - - 如果你不确定自己写的 UDF 函数是否具有通用性,可以发邮件到 `dev@doris.apache.org` 或直接创建 ISSUE 发起讨论。 - -2. UDF 已经完成测试,并正常运行在用户的生产环境中 - -## 准备工作 - -1. UDF 的 source code -2. UDF 的使用手册 - -### 源代码 - -在 `contrib/udf/src/` 下创建一个存放 UDF 函数的文件夹,并将源码和 CMAKE 文件存放在此处。待贡献的源代码应该包含: `.h` , `.cpp`, `CMakeFile.txt`。这里以 udf_samples 为例,首先在 `contrib/udf/src/` 路径下创建一个新的文件夹,并存放源码。 - -``` - ├──contrib - │ └── udf - │ ├── CMakeLists.txt - │ └── src - │ └── udf_samples - │ ├── CMakeLists.txt - │ ├── uda_sample.cpp - │ ├── uda_sample.h - │ ├── udf_sample.cpp - │ └── udf_sample.h - -``` - -1. CMakeLists.txt - - 用户的 `CMakeLists.txt` 放在此处后,需要进行少量更改。去掉 `include udf` 和 `udf lib` 即可。去掉的原因是,在 `contrib/udf` 层级的 CMake 文件中,已经声明了。 - -### 使用手册 - -使用手册需要包含:UDF 函数含义说明,适用的场景,函数的语法,如何编译 UDF ,如何在 Doris 集群中使用 UDF, 以及使用示例。 - -1. 使用手册需包含中英文两个版本,并分别存放在 `docs/zh-CN/extending-doris/udf/contrib` 和 `docs/en/extending-doris/udf/contrib` 下。 - - ``` - ├── docs - │   └── zh-CN - │   └──extending-doris - │ └──udf - │ └──contrib - │ ├── udf-simple-manual.md - - ``` - - ``` - ├── docs - │   └── en - │   └──extending-doris - │ └──udf - │ └──contrib - │ ├── udf-simple-manual.md - ``` - -2. 将两个使用手册的文件,加入中文和英文的 sidebar 中。 - - ``` - vi docs/.vuepress/sidebar/zh-CN.js - { - title: "用户贡献的 UDF", - directoryPath: "contrib/", - children: - [ - "udf-simple-manual", - ], - }, - ``` - - ``` - vi docs/.vuepress/sidebar/en.js - { - title: "Users contribute UDF", - directoryPath: "contrib/", - children: - [ - "udf-simple-manual", - ], - }, - - ``` - -## 贡献 UDF 到社区 - -当你符合前提条件并准备好代码,文档后就可以将 UDF 贡献到 Doris 社区了。在 [Github](https://github.com/apache/incubator-doris) 上面提交 Pull Request (PR) 即可。具体提交方式见:[Pull Request (PR)](https://help.github.com/articles/about-pull-requests/)。 - -最后,当 PR 评审通过并 Merge 后。恭喜你,你的 UDF 已经贡献给 Doris 社区,你可以在 [Doris 官网](http://doris.apache.org/master/zh-CN/) 的扩展功能部分查看到啦~。 diff --git a/docs/zh-CN/extending-doris/udf/java-user-defined-function.md b/docs/zh-CN/extending-doris/udf/java-user-defined-function.md deleted file mode 100644 index 85b85b2c7c..0000000000 --- a/docs/zh-CN/extending-doris/udf/java-user-defined-function.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -{ - "title": "[Experimental] Java UDF", - "language": "zh-CN" -} ---- - - - -# Java UDF - -Java UDF 为用户提供UDF编写的Java接口,以方便用户使用Java语言进行自定义函数的执行。相比于 Native 的 UDF 实现,Java UDF 有如下优势和限制: -1. 优势 - * 兼容性:使用Java UDF可以兼容不同的Doris版本,所以在进行Doris版本升级时,Java UDF不需要进行额外的迁移操作。与此同时,Java UDF同样遵循了和Hive/Spark等引擎同样的编程规范,使得用户可以直接将Hive/Spark的UDF jar包迁移至Doris使用。 - * 安全:Java UDF 执行失败或崩溃仅会导致JVM报错,而不会导致 Doris 进程崩溃。 - * 灵活:Java UDF 中用户通过把第三方依赖打进用户jar包,而不需要额外处理引入的三方库。 - -2. 使用限制 - * 性能:相比于 Native UDF,Java UDF会带来额外的JNI开销,不过通过批式执行的方式,我们已经尽可能的将JNI开销降到最低。 - * 向量化引擎:Java UDF当前只支持向量化引擎。 - -## 编写 UDF 函数 - -本小节主要介绍如何开发一个 Java UDF。在 `samples/doris-demo/java-udf-demo/` 下提供了示例,可供参考。 - -使用Java代码编写UDF,UDF的主入口必须为 `evaluate` 函数。这一点与Hive等其他引擎保持一致。在本示例中,我们编写了 `AddOne` UDF来完成对整型输入进行加一的操作。 -值得一提的是,本例不只是Doris支持的Java UDF,同时还是Hive支持的UDF,也就是说,对于用户来讲,Hive UDF是可以直接迁移至Doris的。 - -## 创建 UDF - -目前暂不支持 UDAF 和 UDTF - -```sql -CREATE FUNCTION -name ([,...]) -[RETURNS] rettype -PROPERTIES (["key"="value"][,...]) -``` -说明: - -1. PROPERTIES中`symbol`表示的是包含UDF类的类名,这个参数是必须设定的。 -2. PROPERTIES中`file`表示的包含用户UDF的jar包,这个参数是必须设定的。 -3. PROPERTIES中`type`表示的 UDF 调用类型,默认为 Native,使用 Java UDF时传 JAVA_UDF。 -4. name: 一个function是要归属于某个DB的,name的形式为`dbName`.`funcName`。当`dbName`没有明确指定的时候,就是使用当前session所在的db作为`dbName`。 - -示例: -```sql -CREATE FUNCTION java_udf_add_one(int) RETURNS int PROPERTIES ( - "file"="file:///path/to/java-udf-demo-jar-with-dependencies.jar", - "symbol"="org.apache.doris.udf.AddOne", - "type"="JAVA_UDF" -); -``` - -## 使用 UDF - -用户使用 UDF 必须拥有对应数据库的 `SELECT` 权限。 - -UDF 的使用与普通的函数方式一致,唯一的区别在于,内置函数的作用域是全局的,而 UDF 的作用域是 DB内部。当链接 session 位于数据内部时,直接使用 UDF 名字会在当前DB内部查找对应的 UDF。否则用户需要显示的指定 UDF 的数据库名字,例如 `dbName`.`funcName`。 - -## 删除 UDF - -当你不再需要 UDF 函数时,你可以通过下述命令来删除一个 UDF 函数, 可以参考 `DROP FUNCTION`。 - -## 示例 -在`samples/doris-demo/java-udf-demo/` 目录中提供了具体示例。具体使用方法见每个目录下的`README.md` - -## 暂不支持的场景 -当前Java UDF仍然处在持续的开发过程中,所以部分功能**尚不完善**。包括: -1. 不支持复杂数据类型(Date,HLL,Bitmap) -2. 尚未统一JVM和Doris的内存管理以及统计信息 diff --git a/docs/zh-CN/extending-doris/udf/native-user-defined-function.md b/docs/zh-CN/extending-doris/udf/native-user-defined-function.md deleted file mode 100644 index fff1ddbd5d..0000000000 --- a/docs/zh-CN/extending-doris/udf/native-user-defined-function.md +++ /dev/null @@ -1,267 +0,0 @@ ---- -{ - "title": "原生UDF", - "language": "zh-CN" -} ---- - - - -# UDF - -UDF 主要适用于,用户需要的分析能力 Doris 并不具备的场景。用户可以自行根据自己的需求,实现自定义的函数,并且通过 UDF 框架注册到 Doris 中,来扩展 Doris 的能力,并解决用户分析需求。 - -UDF 能满足的分析需求分为两种:UDF 和 UDAF。本文中的 UDF 指的是二者的统称。 - -1. UDF: 用户自定义函数,这种函数会对单行进行操作,并且输出单行结果。当用户在查询时使用 UDF ,每行数据最终都会出现在结果集中。典型的 UDF 比如字符串操作 concat() 等。 -2. UDAF: 用户自定义的聚合函数,这种函数对多行进行操作,并且输出单行结果。当用户在查询时使用 UDAF,分组后的每组数据最后会计算出一个值并展结果集中。典型的 UDAF 比如集合操作 sum() 等。一般来说 UDAF 都会结合 group by 一起使用。 - -这篇文档主要讲述了,如何编写自定义的 UDF 函数,以及如何在 Doris 中使用它。 - -## 编写 UDF 函数 - -在使用UDF之前,用户需要先在 Doris 的 UDF 框架下,编写自己的UDF函数。在`contrib/udf/src/udf_samples/udf_sample.h|cpp`文件中是一个简单的 UDF Demo。 - -编写一个 UDF 函数需要以下几个步骤。 - -### 编写函数 - -创建对应的头文件、CPP文件,在CPP文件中实现你需要的逻辑。CPP文件中的实现函数格式与UDF的对应关系。 - -用户可以把自己的 source code 统一放在一个文件夹下。这里以 udf_sample 为例,目录结构如下: - -``` -└── udf_samples - ├── uda_sample.cpp - ├── uda_sample.h - ├── udf_sample.cpp - └── udf_sample.h -``` - -#### 非可变参数 - -对于非可变参数的UDF,那么两者之间的对应关系很直接。 -比如`INT MyADD(INT, INT)`的UDF就会对应`IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2)`。 - -1. `AddUdf`可以为任意的名字,只要创建UDF的时候指定即可。 -2. 实现函数中的第一个参数永远是`FunctionContext*`。实现者可以通过这个结构体获得一些查询相关的内容,以及申请一些需要使用的内存。具体使用的接口可以参考`udf/udf.h`中的定义。 -3. 实现函数中从第二个参数开始需要与UDF的参数一一对应,比如`IntVal`对应`INT`类型。这部分的类型都要使用`const`引用。 -4. 返回参数与UDF的参数的类型要相对应。 - -#### 可变参数 - -对于可变参数,可以参见以下例子,UDF`String md5sum(String, ...)`对应的 -实现函数是`StringVal md5sumUdf(FunctionContext* ctx, int num_args, const StringVal* args)` - -1. `md5sumUdf`这个也是可以任意改变的,创建的时候指定即可。 -2. 第一个参数与非可变参数函数一样,传入的是一个`FunctionContext*`。 -3. 可变参数部分由两部分组成,首先会传入一个整数,说明后面还有几个参数。后面传入的是一个可变参数部分的数组。 - -#### 类型对应关系 - -|UDF Type|Argument Type| -|----|---------| -|TinyInt|TinyIntVal| -|SmallInt|SmallIntVal| -|Int|IntVal| -|BigInt|BigIntVal| -|LargeInt|LargeIntVal| -|Float|FloatVal| -|Double|DoubleVal| -|Date|DateTimeVal| -|Datetime|DateTimeVal| -|Char|StringVal| -|Varchar|StringVal| -|Decimal|DecimalVal| - - -## 编译 UDF 函数 - -由于 UDF 实现中依赖了 Doris 的 UDF 框架 , 所以在编译 UDF 函数的时候首先要对 Doris 进行编译,也就是对 UDF 框架进行编译。 - -编译完成后会生成,UDF 框架的静态库文件。之后引入 UDF 框架依赖,并编译 UDF 即可。 - -### 编译Doris - -在 Doris 根目录下执行 `sh build.sh` 就会在 `output/udf/` 生成 UDF 框架的静态库文件 `headers|libs` - -``` -├── output -│   └── udf -│   ├── include -│   │   ├── uda_test_harness.h -│   │   └── udf.h -│   └── lib -│   └── libDorisUdf.a - -``` - -### 编写 UDF 编译文件 - -1. 准备 thirdparty - - `thirdparty` 文件夹主要用于存放用户 UDF 函数依赖的第三方库,包括头文件及静态库。其中必须包含依赖的 Doris UDF 框架中 `udf.h` 和 `libDorisUdf.a` 这两个文件。 - - 这里以 `udf_sample` 为例, 在 用户自己 `udf_samples` 目录用于存放 source code。在同级目录下再创建一个 `thirdparty` 文件夹用于存放静态库。目录结构如下: - - ``` - ├── thirdparty - │ │── include - │ │ └── udf.h - │ └── lib - │ └── libDorisUdf.a - └── udf_samples - - ``` - - `udf.h` 是 UDF 框架头文件。存放路径为 `doris/output/udf/include/udf.h`。 用户需要将 Doris 编译产出中的这个头文件拷贝到自己的 `thirdparty` 的 include 文件夹下。 - - `libDorisUdf.a` 是 UDF 框架的静态库。Doris 编译完成后该文件存放在 `doris/output/udf/lib/libDorisUdf.a`。用户需要将该文件拷贝到自己的 `thirdparty` 的 lib 文件夹下。 - - *注意:UDF 框架的静态库只有完成 Doris 编译后才会生成。 - -2. 准备编译 UDF 的 CMakeFiles.txt - - CMakeFiles.txt 用于声明 UDF 函数如何进行编译。存放在源码文件夹下,与用户代码平级。这里以 `udf_samples` 为例目录结构如下: - - ``` - ├── thirdparty - └── udf_samples - ├── CMakeLists.txt - ├── uda_sample.cpp - ├── uda_sample.h - ├── udf_sample.cpp - └── udf_sample.h - ``` - - + 需要显示声明引用 `libDorisUdf.a` - + 声明 `udf.h` 头文件位置 - - - 以 udf_sample 为例 - - ``` - # Include udf - include_directories(thirdparty/include) - - # Set all libraries - add_library(udf STATIC IMPORTED) - set_target_properties(udf PROPERTIES IMPORTED_LOCATION thirdparty/lib/libDorisUdf.a) - - # where to put generated libraries - set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") - - # where to put generated binaries - set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/udf_samples") - - add_library(udfsample SHARED udf_sample.cpp) - target_link_libraries(udfsample - udf - -static-libstdc++ - -static-libgcc - ) - - add_library(udasample SHARED uda_sample.cpp) - target_link_libraries(udasample - udf - -static-libstdc++ - -static-libgcc - ) - ``` - - 如果用户的 UDF 函数还依赖了其他的三方库,则需要声明 include,lib,并在 `add_library` 中增加依赖。 - -所有文件准备齐后完整的目录结构如下: - -``` - ├── thirdparty - │ │── include - │ │ └── udf.h - │ └── lib - │ └── libDorisUdf.a - └── udf_samples - ├── CMakeLists.txt - ├── uda_sample.cpp - ├── uda_sample.h - ├── udf_sample.cpp - └── udf_sample.h -``` - -准备好上述文件就可以直接编译 UDF 了 - -### 执行编译 - -在 udf_samples 文件夹下创建一个 build 文件夹,用于存放编译产出。 - -在 build 文件夹下运行命令 `cmake ../` 生成Makefile,并执行 make 就会生成对应动态库。 - -``` -├── thirdparty -├── udf_samples - └── build -``` - -### 编译结果 - -编译完成后的 UDF 动态链接库就生成成功了。在 `build/src/` 下,以 udf_samples 为例,目录结构如下: - -``` - -├── thirdparty -├── udf_samples - └── build - └── src - └── udf_samples - ├── libudasample.so -   └── libudfsample.so - -``` - -## 创建 UDF 函数 - -通过上述的步骤后,你可以得到 UDF 的动态库(也就是编译结果中的 `.so` 文件)。你需要将这个动态库放到一个能够通过 HTTP 协议访问到的位置。 - -然后登录 Doris 系统,在 mysql-client 中通过 `CREATE FUNCTION` 语法创建 UDF 函数。你需要拥有ADMIN权限才能够完成这个操作。这时 Doris 系统内部就会存在刚才创建好的 UDF。 - -``` -CREATE [AGGREGATE] FUNCTION - name ([argtype][,...]) - [RETURNS] rettype - PROPERTIES (["key"="value"][,...]) -``` -说明: - -1. PROPERTIES中`symbol`表示的是,执行入口函数的对应symbol,这个参数是必须设定。你可以通过`nm`命令来获得对应的symbol,比如`nm libudfsample.so | grep AddUdf`获得到的`_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_`就是对应的symbol。 -2. PROPERTIES中`object_file`表示的是从哪里能够下载到对应的动态库,这个参数是必须设定的。 -3. name: 一个function是要归属于某个DB的,name的形式为`dbName`.`funcName`。当`dbName`没有明确指定的时候,就是使用当前session所在的db作为`dbName`。 - -具体使用可以参见 `CREATE FUNCTION` 获取更详细信息。 - -## 使用 UDF - -用户使用 UDF 必须拥有对应数据库的 `SELECT` 权限。 - -UDF 的使用与普通的函数方式一致,唯一的区别在于,内置函数的作用域是全局的,而 UDF 的作用域是 DB内部。当链接 session 位于数据内部时,直接使用 UDF 名字会在当前DB内部查找对应的 UDF。否则用户需要显示的指定 UDF 的数据库名字,例如 `dbName`.`funcName`。 - - -## 删除 UDF函数 - -当你不再需要 UDF 函数时,你可以通过下述命令来删除一个 UDF 函数, 可以参考 `DROP FUNCTION`。 - diff --git a/docs/zh-CN/extending-doris/udf/remote-user-defined-function.md b/docs/zh-CN/extending-doris/udf/remote-user-defined-function.md deleted file mode 100644 index ae22d5936f..0000000000 --- a/docs/zh-CN/extending-doris/udf/remote-user-defined-function.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -{ - "title": "远程UDF", - "language": "zh-CN" -} ---- - - - -# 远程UDF - -Remote UDF Service 支持通过 RPC 的方式访问用户提供的 UDF Service,以实现用户自定义函数的执行。相比于 Native 的 UDF 实现,Remote UDF Service 有如下优势和限制: -1. 优势 - * 跨语言:可以用 Protobuf 支持的各类语言编写 UDF Service。 - * 安全:UDF 执行失败或崩溃,仅会影响 UDF Service 自身,而不会导致 Doris 进程崩溃。 - * 灵活:UDF Service 中可以调用任意其他服务或程序库类,以满足更多样的业务需求。 - -2. 使用限制 - * 性能:相比于 Native UDF,UDF Service 会带来额外的网络开销,因此性能会远低于 Native UDF。同时,UDF Service 自身的实现也会影响函数的执行效率,用户需要自行处理高并发、线程安全等问题。 - * 单行模式和批处理模式:Doris 原先的的基于行存的查询执行框架会对每一行数据执行一次 UDF RPC 调用,因此执行效率非常差,而在新的向量化执行框架下,会对每一批数据(默认2048行)执行一次 UDF RPC 调用,因此性能有明显提升。实际测试中,基于向量化和批处理方式的 Remote UDF 性能和基于行存的 Native UDF 性能相当,可供参考。 - -## 编写 UDF 函数 - - -本小节主要介绍如何开发一个 Remote RPC service。在 `samples/doris-demo/udf-demo/` 下提供了 Java 版本的示例,可供参考。 - -### 拷贝 proto 文件 - -拷贝 gensrc/proto/function_service.proto 和 gensrc/proto/types.proto 到 Rpc 服务中 - -- function_service.proto - - PFunctionCallRequest - - function_name:函数名称,对应创建函数时指定的symbol - - args:方法传递的参数 - - context:查询上下文信息 - - PFunctionCallResponse - - result:结果 - - status:状态,0代表正常 - - PCheckFunctionRequest - - function:函数相关信息 - - match_type:匹配类型 - - PCheckFunctionResponse - - status:状态,0代表正常 - -### 生成接口 - -通过 protoc 生成代码,具体参数通过 protoc -h 查看 - -### 实现接口 - -共需要实现以下三个方法 -- fnCall:用于编写计算逻辑 -- checkFn:用于创建 UDF 时校验,校验函数名/参数/返回值等是否合法 -- handShake:用于接口探活 - -## 创建 UDF - -目前暂不支持 UDAF 和 UDTF - -```sql -CREATE FUNCTION -name ([,...]) -[RETURNS] rettype -PROPERTIES (["key"="value"][,...]) -``` -说明: - -1. PROPERTIES中`symbol`表示的是 rpc 调用传递的方法名,这个参数是必须设定的。 -2. PROPERTIES中`object_file`表示的 rpc 服务地址,目前支持单个地址和 brpc 兼容格式的集群地址,集群连接方式 参考 [格式说明](https://github.com/apache/incubator-brpc/blob/master/docs/cn/client.md#%E8%BF%9E%E6%8E%A5%E6%9C%8D%E5%8A%A1%E9%9B%86%E7%BE%A4)。 -3. PROPERTIES中`type`表示的 UDF 调用类型,默认为 Native,使用 Rpc UDF时传 RPC。 -4. name: 一个function是要归属于某个DB的,name的形式为`dbName`.`funcName`。当`dbName`没有明确指定的时候,就是使用当前session所在的db作为`dbName`。 - -示例: -```sql -CREATE FUNCTION rpc_add(INT, INT) RETURNS INT PROPERTIES ( - "SYMBOL"="add_int", - "OBJECT_FILE"="127.0.0.1:9090", - "TYPE"="RPC" -); -``` - -## 使用 UDF - -用户使用 UDF 必须拥有对应数据库的 `SELECT` 权限。 - -UDF 的使用与普通的函数方式一致,唯一的区别在于,内置函数的作用域是全局的,而 UDF 的作用域是 DB内部。当链接 session 位于数据内部时,直接使用 UDF 名字会在当前DB内部查找对应的 UDF。否则用户需要显示的指定 UDF 的数据库名字,例如 `dbName`.`funcName`。 - -## 删除 UDF - -当你不再需要 UDF 函数时,你可以通过下述命令来删除一个 UDF 函数, 可以参考 `DROP FUNCTION`。 - -## 示例 -在`samples/doris-demo/` 目录中提供和 cpp/java/python 语言的rpc server 实现示例。具体使用方法见每个目录下的`README.md` diff --git a/new-docs/zh-CN/faq/data-faq.md b/docs/zh-CN/faq/data-faq.md similarity index 100% rename from new-docs/zh-CN/faq/data-faq.md rename to docs/zh-CN/faq/data-faq.md diff --git a/docs/zh-CN/faq/error.md b/docs/zh-CN/faq/error.md deleted file mode 100644 index fea200c496..0000000000 --- a/docs/zh-CN/faq/error.md +++ /dev/null @@ -1,151 +0,0 @@ ---- -{ - "title": "常见报错", - "language": "zh-CN" -} ---- - - - -# 常见报错 - -本文档主要用于记录 Doris 使用过程中的报错,如果您有遇见一些报错,欢迎贡献给我们更新。 - -### E1. 查询报错:Failed to get scan range, no queryable replica found in tablet: xxxx - -这种情况是因为对应的 tablet 没有找到可以查询的副本,通常原因可能是 BE 宕机、副本缺失等。可以先通过 `show tablet tablet_id` 语句,然后执行后面的 `show proc` 语句,查看这个 tablet 对应的副本信息,检查副本是否完整。同时还可以通过 `show proc "/cluster_balance"` 信息来查询集群内副本调度和修复的进度。 - -关于数据副本管理相关的命令,可以参阅 [数据副本管理](../administrator-guide/operation/tablet-repair-and-balance.md)。 - -### E2. FE启动失败,fe.log中一直滚动 "wait catalog to be ready. FE type UNKNOWN" - -这种问题通常有两个原因: - -1. 本次FE启动时获取到的本机IP和上次启动不一致,通常是因为没有正确设置 `priority_network` 而导致 FE 启动时匹配到了错误的 IP 地址。需修改 `priority_network` 后重启 FE。 - -2. 集群内多数 Follower FE 节点未启动。比如有 3 个 Follower,只启动了一个。此时需要将另外至少一个 FE 也启动,FE 可选举组方能选举出 Master 已提供服务。 - -如果以上情况都不能解决,可以按照 Doris 官网文档中的[元数据运维文档](../administrator-guide/operation/metadata-operation.md)进行恢复: - -### E3. tablet writer write failed, tablet_id=27306172, txn_id=28573520, err=-235 or -215 or -238 - -这个错误通常发生在数据导入操作中。新版错误码为 -235,老版本错误码可能是 -215。这个错误的含义是,对应tablet的数据版本超过了最大限制(默认500,由 BE 参数 `max_tablet_version_num` 控制),后续写入将被拒绝。比如问题中这个错误,即表示 27306172 这个tablet的数据版本超过了限制。 - -这个错误通常是因为导入的频率过高,大于后台数据的compaction速度,导致版本堆积并最终超过了限制。此时,我们可以先通过show tablet 27306172 语句,然后执行结果中的 show proc 语句,查看tablet各个副本的情况。结果中的 versionCount即表示版本数量。如果发现某个副本的版本数量过多,则需要降低导入频率或停止导入,并观察版本数是否有下降。如果停止导入后,版本数依然没有下降,则需要去对应的BE节点查看be.INFO日志,搜索tablet id以及 compaction关键词,检查compaction是否正常运行。关于compaction调优相关,可以参阅 ApacheDoris 公众号文章:Doris 最佳实践-Compaction调优(3) - --238 错误通常出现在同一批导入数据量过大的情况,从而导致某一个 tablet 的 Segment 文件过多(默认是 200,由 BE 参数 `max_segment_num_per_rowset` 控制)。此时建议减少一批次导入的数据量,或者适当提高 BE 配置参数值来解决。 - -### E4. tablet 110309738 has few replicas: 1, alive backends: [10003] - -这个错误可能发生在查询或者导入操作中。通常意味着对应tablet的副本出现了异常。 - -此时,可以先通过 show backends 命令检查BE节点是否有宕机,如 isAlive 字段为false,或者 LastStartTime 是最近的某个时间(表示最近重启过)。如果BE有宕机,则需要去BE对应的节点,查看be.out日志。如果BE是因为异常原因宕机,通常be.out中会打印异常堆栈,帮助排查问题。如果be.out中没有错误堆栈。则可以通过linux命令dmesg -T 检查是否是因为OOM导致进程被系统kill掉。 - -如果没有BE节点宕机,则需要通过show tablet 110309738 语句,然后执行结果中的 show proc 语句,查看tablet各个副本的情况,进一步排查。 - -### E5. disk xxxxx on backend xxx exceed limit usage - -通常出现在导入、Alter等操作中。这个错误意味着对应BE的对应磁盘的使用量超过了阈值(默认95%)此时可以先通过 show backends 命令,其中MaxDiskUsedPct展示的是对应BE上,使用率最高的那块磁盘的使用率,如果超过95%,则会报这个错误。 - -此时需要前往对应BE节点,查看数据目录下的使用量情况。其中trash目录和snapshot目录可以手动清理以释放空间。如果是data目录占用较大,则需要考虑删除部分数据以释放空间了。具体可以参阅[磁盘空间管理](../administrator-guide/operation/disk-capacity.md)。 - -### E6. invalid cluster id: xxxx - -这个错误可能会在show backends 或 show frontends 命令的结果中出现。通常出现在某个FE或BE节点的错误信息列中。这个错误的含义是,Master FE向这个节点发送心跳信息后,该节点发现心跳信息中携带的 cluster id和本地存储的 cluster id不同,所以拒绝回应心跳。 - -Doris的 Master FE 节点会主动发送心跳给各个FE或BE节点,并且在心跳信息中会携带一个cluster_id。cluster_id是在一个集群初始化时,由Master FE生成的唯一集群标识。当FE或BE第一次收到心跳信息后,则会将cluster_id以文件的形式保存在本地。FE的该文件在元数据目录的image/目录下,BE则在所有数据目录下都有一个cluster_id文件。之后,每次节点收到心跳后,都会用本地cluster_id的内容和心跳中的内容作比对,如果不一致,则拒绝响应心跳。 - -该机制是一个节点认证机制,以防止接收到集群外的节点发送来的错误的心跳信息。 - -如果需要恢复这个错误。首先要先确认所有节点是否都是正确的集群中的节点。之后,对于FE节点,可以尝试修改元数据目录下的 image/VERSION 文件中的 cluster_id 值后重启FE。对于BE节点,则可以删除所有数据目录下的 cluster_id 文件后重启 BE。 - -### E7. 通过 Java 程序调用 stream load 导入数据,在一批次数据量较大时,可能会报错 Broken Pipe - -除了 Broken Pipe 外,还可能出现一些其他的奇怪的错误。 - -这个情况通常出现在开启httpv2后。因为httpv2是使用spring boot实现的http 服务,并且使用tomcat作为默认内置容器。但是tomcat对307转发的处理似乎有些问题,所以后面将内置容器修改为了jetty。此外,在java程序中的 apache http client的版本需要使用4.5.13以后的版本。之前的版本,对转发的处理也存在一些问题。 - -所以这个问题可以有两种解决方式: - -1. 关闭httpv2 - - 在fe.conf中添加 enable_http_server_v2=false后重启FE。但是这样无法再使用新版UI界面,并且之后的一些基于httpv2的新接口也无法使用。(正常的导入查询不受影响)。 - -2. 升级 - - 可以升级到 Doris 0.15 及之后的版本,已修复这个问题。 - -### E8. `Lost connection to MySQL server at 'reading initial communication packet', system error: 0` - -如果使用 MySQL 客户端连接 Doris 时出现如下问题,这通常是因为编译 FE 时使用的 jdk 版本和运行 FE 时使用的 jdk 版本不同导致的。 -注意使用 docker 编译镜像编译时,默认的 JDK 版本是 openjdk 11,可以通过命令切换到 openjdk 8(详见编译文档)。 - -### E9. -214 错误 - -在执行导入、查询等操作时,可能会遇到如下错误: - -``` -failed to initialize storage reader. tablet=63416.1050661139.aa4d304e7a7aff9c-f0fa7579928c85a0, res=-214, backend=192.168.100.10 -``` - --214 错误意味着对应 tablet 的数据版本缺失。比如如上错误,表示 tablet 63416 在 192.168.100.10 这个 BE 上的副本的数据版本有缺失。(可能还有其他类似错误码,都可以用如下方式进行排查和修复)。 - -通常情况下,如果你的数据是多副本的,那么系统会自动修复这些有问题的副本。可以通过以下步骤进行排查: - -首先通过 `show tablet 63416` 语句并执行结果中的 `show proc xxx` 语句来查看对应 tablet 的各个副本情况。通常我们需要关心 `Version` 这一列的数据。 - -正常情况下,一个 tablet 的多个副本的 Version 应该是相同的。并且和对应分区的 VisibleVersion 版本相同。 - -你可以通过 `show partitions from tblx` 来查看对应的分区版本(tablet 对应的分区可以在 `show tablet` 语句中获取。) - -同时,你也可以访问 `show proc` 语句中的 CompactionStatus 列中的 URL(在浏览器打开即可)来查看更具体的版本信息,来检查具体丢失的是哪些版本。 - -如果长时间没有自动修复,则需要通过 `show proc "/cluster_balance"` 语句,查看当前系统正在执行的 tablet 修复和调度任务。可能是因为有大量的 tablet 在等待被调度,导致修复时间较长。可以关注 `pending_tablets` 和 `running_tablets` 中的记录。 - -更进一步的,可以通过 `admin repair` 语句来指定优先修复某个表或分区,具体可以参阅 `help admin repair`; - -如果依然无法修复,那么在多副本的情况下,我们使用 `admin set replica status` 命令强制将有问题的副本下线。具体可参阅 `help admin set replica status` 中将副本状态置为 bad 的示例。(置为 bad 后,副本将不会再被访问。并且会后续自动修复。但在操作前,应先确保其他副本是正常的) - -### E10. Not connected to 192.168.100.1:8060 yet, server_id=384 - -在导入或者查询时,我们可能遇到这个错误。如果你去对应的 BE 日志中查看,也可能会找到类似错误。 - -这是一个 RPC 错误,通常有两种可能:1. 对应的 BE 节点宕机。2. rpc 拥塞或其他错误。 - -如果是 BE 节点宕机,则需要查看具体的宕机原因。这里只讨论 rpc 拥塞的问题。 - -一种情况是 OVERCROWDED,即表示 rpc 源端有大量未发送的数据超过了阈值。BE 有两个参数与之相关: - -1. `brpc_socket_max_unwritten_bytes`:默认 1GB,如果未发送数据超过这个值,则会报错。可以适当修改这个值以避免 OVERCROWDED 错误。(但这个治标不治本,本质上还是有拥塞发生)。 -2. `tablet_writer_ignore_eovercrowded`:默认为 false。如果设为true,则 Doris 会忽略导入过程中出现的 OVERCROWDED 错误。这个参数主要为了避免导入失败,以提高导入的稳定性。 - -第二种是 rpc 的包大小超过 max_body_size。如果查询中带有超大 String 类型,或者 bitmap 类型时,可能出现这个问题。可以通过修改以下 BE 参数规避: - -1. `brpc_max_body_size`:默认 3GB. - -### E11. `recoveryTracker should overlap or follow on disk last VLSN of 4,422,880 recoveryFirst= 4,422,882 UNEXPECTED_STATE_FATAL` - -有时重启 FE,会出现如上错误(通常只会出现在多 Follower 的情况下)。并且错误中的两个数值相差2。导致 FE 启动失败。 - -这是 bdbje 的一个 bug,尚未解决。遇到这种情况,只能通过 [元数据运维手册](../administrator-guide/operation/metadata-operation.md) 中的 故障恢复 进行操作来恢复元数据了。 - -### E12.Doris编译安装JDK版本不兼容问题 - -在自己使用 Docker 编译 Doris 的时候,编译完成安装以后启动FE,出现 ```java.lang.Suchmethoderror: java.nio. ByteBuffer. limit (I)Ljava/nio/ByteBuffer;``` 异常信息,这是因为Docker里默认是JDK 11,如果你的安装环境是使用JDK8 ,需要在 Docker 里 JDK 环境切换成 JDK8,具体切换方法参照[编译](https://doris.apache.org/zh-CN/installing/compilation.html) diff --git a/docs/zh-CN/faq/faq.md b/docs/zh-CN/faq/faq.md deleted file mode 100644 index 6b52d196b1..0000000000 --- a/docs/zh-CN/faq/faq.md +++ /dev/null @@ -1,297 +0,0 @@ ---- -{ - "title": "FAQ", - "language": "zh-CN" -} ---- - - - -# FAQ - -本文档主要用于记录 Doris 使用过程中的常见问题。会不定期更新。 - -### Q1. 使用 Stream Load 访问 FE 的公网地址导入数据,被重定向到内网 IP? - -当 stream load 的连接目标为FE的http端口时,FE仅会随机选择一台BE节点做http 307 redirect 操作,因此用户的请求实际是发送给FE指派的某一个BE的。而redirect返回的是BE的ip,也即内网IP。所以如果你是通过FE的公网IP发送的请求,很有可能因为redirect到内网地址而无法连接。 - -通常的做法,一种是确保自己能够访问内网IP地址,或者是给所有BE上层假设一个负载均衡,然后直接将 stream load 请求发送到负载均衡器上,由负载均衡将请求透传到BE节点。 - -### Q2. 通过 DECOMMISSION 下线BE节点时,为什么总会有部分tablet残留? - -在下线过程中,通过 show backends 查看下线节点的 tabletNum ,会观察到 tabletNum 数量在减少,说明数据分片正在从这个节点迁移走。当数量减到0时,系统会自动删除这个节点。但某些情况下,tabletNum 下降到一定数值后就不变化。这通常可能有以下两种原因: - -1. 这些 tablet 属于刚被删除的表、分区或物化视图。而刚被删除的对象会保留在回收站中。而下线逻辑不会处理这些分片。可以通过修改 FE 的配置参数 catalog_trash_expire_second 来修改对象在回收站中驻留的时间。当对象从回收站中被删除后,这些 tablet就会被处理了。 - -2. 这些 tablet 的迁移任务出现了问题。此时需要通过 show proc "/cluster_balance" 来查看具体任务的错误了。 - -对于以上情况,可以先通过 show proc "/statistic" 查看集群是否还有 unhealthy 的分片,如果为0,则可以直接通过 drop backend 语句删除这个 BE 。否则,还需要具体查看不健康分片的副本情况。 - - -### Q3. priorty_network 应该如何设置? - -priorty_network 是 FE、BE 都有的配置参数。这个参数主要用于帮助系统选择正确的网卡 IP 作为自己的 IP 。建议任何情况下,都显式的设置这个参数,以防止后续机器增加新网卡导致IP选择不正确的问题。 - -priorty_network 的值是 CIDR 格式表示的。分为两部分,第一部分是点分十进制的 IP 地址,第二部分是一个前缀长度。比如 10.168.1.0/8 会匹配所有 10.xx.xx.xx 的IP地址,而 10.168.1.0/16 会匹配所有 10.168.xx.xx 的 IP 地址。 - -之所以使用 CIDR 格式而不是直接指定一个具体 IP,是为了保证所有节点都可以使用统一的配置值。比如有两个节点:10.168.10.1 和 10.168.10.2,则我们可以使用 10.168.10.0/24 来作为 priorty_network 的值。 - -### Q4. FE的Master、Follower、Observer都是什么? - -首先明确一点,FE 只有两种角色:Follower 和 Observer。而 Master 只是一组 Follower 节点中选择出来的一个 FE。Master 可以看成是一种特殊的 Follower。所以当我们被问及一个集群有多少 FE,都是什么角色时,正确的回答当时应该是所有 FE 节点的个数,以及 Follower 角色的个数和 Observer 角色的个数。 - -所有 Follower 角色的 FE 节点会组成一个可选择组,类似 Poxas 一致性协议里的组概念。组内会选举出一个 Follower 作为 Master。当 Master 挂了,会自动选择新的 Follower 作为 Master。而 Observer 不会参与选举,因此 Observer 也不会称为 Master 。 - -一条元数据日志需要在多数 Follower 节点写入成功,才算成功。比如3个 FE ,2个写入成功才可以。这也是为什么 Follower 角色的个数需要是奇数的原因。 - -Observer 角色和这个单词的含义一样,仅仅作为观察者来同步已经成功写入的元数据日志,并且提供元数据读服务。他不会参与多数写的逻辑。 - -通常情况下,可以部署 1 Follower + 2 Observer 或者 3 Follower + N Observer。前者运维简单,几乎不会出现 Follower 之间的一致性协议导致这种复杂错误情况(百度内部集群大多使用这种方式)。后者可以保证元数据写的高可用,如果是高并发查询场景,可以适当增加 Observer。 - -### Q5. Doris 是否支持修改列名? - -不支持修改列名。 - -Doris支持修改数据库名、表名、分区名、物化视图(Rollup)名称,以及列的类型、注释、默认值等等。但遗憾的是,目前不支持修改列名。 - -因为一些历史原因,目前列名称是直接写入到数据文件中的。Doris在查询时,也是通过类名查找到对应的列的。所以修改列名不仅是简单的元数据修改,还会涉及到数据的重写,是一个非常重的操作。 - -我们不排除后续通过一些兼容手段来支持轻量化的列名修改操作。 - -### Q6. Unique Key模型的表是否支持创建物化视图? - -不支持。 - -Unique Key模型的表是一个对业务比较友好的表,因为其特有的按照主键去重的功能,能够很方便的同步数据频繁变更的业务数据库。因此,很多用户在将数据接入到Doris时,会首先考虑使用Unique Key模型。 - -但遗憾的是,Unique Key模型的表是无法建立物化视图的。原因在于,物化视图的本质,是通过预计算来将数据“预先算好”,这样在查询时直接返回已经计算好的数据,来加速查询。在物化视图中,“预计算”的数据通常是一些聚合指标,比如求和、求count。这时,如果数据发生变更,如udpate或delete,因为预计算的数据已经丢失了明细信息,因此无法同步的进行更新。比如一个求和值5,可能是 1+4,也可能是2+3。因为明细信息的丢失,我们无法区分这个求和值是如何计算出来的,因此也就无法满足更新的需求。 - -### Q7. show backends/frontends 查看到的信息不完整 - -在执行如` show backends/frontends` 等某些语句后,结果中可能会发现有部分列内容不全。比如show backends结果中看不到磁盘容量信息等。 - -通常这个问题会出现在集群有多个FE的情况下,如果用户连接到非Master FE节点执行这些语句,就会看到不完整的信息。这是因为,部分信息仅存在于Master FE节点。比如BE的磁盘使用量信息等。所以只有在直连Master FE后,才能获得完整信息。 - -当然,用户也可以在执行这些语句前,先执行 `set forward_to_master=true;` 这个会话变量设置为true后,后续执行的一些信息查看类语句会自动转发到Master FE获取结果。这样,不论用户连接的是哪个FE,都可以获取到完整结果了。 - -### Q8. 节点新增加了新的磁盘,为什么数据没有均衡到新的磁盘上? - -当前Doris的均衡策略是以节点为单位的。也就是说,是按照节点整体的负载指标(分片数量和总磁盘利用率)来判断集群负载。并且将数据分片从高负载节点迁移到低负载节点。如果每个节点都增加了一块磁盘,则从节点整体角度看,负载并没有改变,所以无法触发均衡逻辑。 - -此外,Doris目前并不支持单个节点内部,各个磁盘间的均衡操作。所以新增磁盘后,不会将数据均衡到新的磁盘。 - -但是,数据在节点之间迁移时,Doris会考虑磁盘的因素。比如一个分片从A节点迁移到B节点,会优先选择B节点中,磁盘空间利用率较低的磁盘。 - -这里我们提供3种方式解决这个问题: - -1. 重建新表 - - 通过create table like 语句建立新表,然后使用 insert into select的方式将数据从老表同步到新表。因为创建新表时,新表的数据分片会分布在新的磁盘中,从而数据也会写入新的磁盘。这种方式适用于数据量较小的情况(几十GB以内)。 - -2. 通过Decommission命令 - - decommission命令用于安全下线一个BE节点。该命令会先将该节点上的数据分片迁移到其他节点,然后在删除该节点。前面说过,在数据迁移时,会优先考虑磁盘利用率低的磁盘,因此该方式可以“强制”让数据迁移到其他节点的磁盘上。当数据迁移完成后,我们在cancel掉这个decommission操作,这样,数据又会重新均衡回这个节点。当我们对所有BE节点都执行一遍上述步骤后,数据将会均匀的分布在所有节点的所有磁盘上。 - - 注意,在执行decommission命令前,先执行以下命令,以避免节点下线完成后被删除。 - - `admin set frontend config("drop_backend_after_decommission" = "false");` - -3. 使用API手动迁移数据 - - Doris提供了[HTTP API](../administrator-guide/http-actions/tablet-migration-action.md),可以手动指定一个磁盘上的数据分片迁移到另一个磁盘上。 - -### Q9. 如何正确阅读 FE/BE 日志? - -很多情况下我们需要通过日志来排查问题。这里说明一下FE/BE日志的格式和查看方式。 - -1. FE - - FE日志主要有: - - * fe.log:主日志。包括除fe.out外的所有内容。 - * fe.warn.log:主日志的子集,仅记录 WARN 和 ERROR 级别的日志。 - * fe.out:标准/错误输出的日志(stdout和stderr)。 - * fe.audit.log:审计日志,记录这个FE接收的所有SQL请求。 - - 一条典型的FE日志如下: - - ``` - 2021-09-16 23:13:22,502 INFO (tablet scheduler|43) [BeLoadRebalancer.selectAlternativeTabletsForCluster():85] cluster is balance: default_cluster with medium: HDD. skip - ``` - - * `2021-09-16 23:13:22,502`:日志时间。 - * `INFO:日志级别,默认是INFO`。 - * `(tablet scheduler|43)`:线程名称和线程id。通过线程id,就可以查看这个线程上下文信息,方面排查这个线程发生的事情。 - * `BeLoadRebalancer.selectAlternativeTabletsForCluster():85`:类名、方法名和代码行号。 - * `cluster is balance xxx`:日志内容。 - - 通常情况下我们主要查看fe.log日志。特殊情况下,有些日志可能输出到了fe.out中。 - -2. BE - - BE日志主要有: - - * be.INFO:主日志。这其实是个软连,连接到最新的一个 be.INFO.xxxx上。 - * be.WARNING:主日志的子集,仅记录 WARN 和 FATAL 级别的日志。这其实是个软连,连接到最新的一个 be.WARN.xxxx上。 - * be.out:标准/错误输出的日志(stdout和stderr)。 - - 一条典型的BE日志如下: - - ``` - I0916 23:21:22.038795 28087 task_worker_pool.cpp:1594] finish report TASK. master host: 10.10.10.10, port: 9222 - ``` - - * `I0916 23:21:22.038795`:日志等级和日期时间。大写字母I表示INFO,W表示WARN,F表示FATAL。 - * `28087`:线程id。通过线程id,就可以查看这个线程上下文信息,方面排查这个线程发生的事情。 - * `task_worker_pool.cpp:1594`:代码文件和行号。 - * `finish report TASK xxx`:日志内容。 - - 通常情况下我们主要查看be.INFO日志。特殊情况下,如BE宕机,则需要查看be.out。 - -### Q10. FE/BE 节点挂了应该如何排查原因? - -1. BE - - BE进程是 C/C++ 进程,可能会因为一些程序Bug(内存越界,非法地址访问等)或 Out Of Memory(OOM)导致进程挂掉。此时我们可以通过以下几个步骤查看错误原因: - - 1. 查看be.out - - BE进程实现了在程序因异常情况退出时,会打印当前的错误堆栈到be.out里(注意是be.out,不是be.INFO或be.WARNING)。通过错误堆栈,通常能够大致获悉程序出错的位置。 - - 注意,如果be.out中出现错误堆栈,通常情况下是因为程序bug,普通用户可能无法自行解决,欢迎前往微信群、github discussion 或dev邮件组寻求帮助,并贴出对应的错误堆栈,以便快速排查问题。 - - 2. dmesg - - 如果be.out没有堆栈信息,则大概率是因为OOM被系统强制kill掉了。此时可以通过dmesg -T 这个命令查看linux系统日志,如果最后出现 Memory cgroup out of memory: Kill process 7187 (palo_be) score 1007 or sacrifice child 类似的日志,则说明是OOM导致的。 - - 内存问题可能有多方面原因,如大查询、导入、compaction等。Doris也在不断优化内存使用。欢迎前往微信群、github discussion 或dev邮件组寻求帮助。 - - 3. 查看be.INFO中是否有F开头的日志。 - - F开头的的日志是 Fatal 日志。如 F0916 ,表示9月16号的Fatal日志。Fatal日志通常表示程序断言错误,断言错误会直接导致进程退出(说明程序出现了Bug)。欢迎前往微信群、github discussion 或dev邮件组寻求帮助。 - - 4. Minidump - - Mindump 是 Doris 0.15 版本之后加入的功能,具体可参阅[文档](../developer-guide/minidump.md)。 - -2. FE - - FE 是 java 进程,健壮程度要由于 C/C++ 程序。通常FE 挂掉的原因可能是 OOM(Out-of-Memory)或者是元数据写入失败。这些错误通常在 fe.log 或者 fe.out 中有错误堆栈。需要根据错误堆栈信息进一步排查。 - -### Q11. 关于数据目录SSD和HDD的配置。 - -Doris支持一个BE节点配置多个存储路径。通常情况下,每块盘配置一个存储路径即可。同时,Doris支持指定路径的存储介质属性,如SSD或HDD。SSD代表高速存储设备,HDD代表低速存储设备。 - -通过指定路径的存储介质属性,我们可以利用Doris的冷热数据分区存储功能,在分区级别将热数据存储在SSD中,而冷数据会自动转移到HDD中。 - -需要注意的是,Doris并不会自动感知存储路径所在磁盘的实际存储介质类型。这个类型需要用户在路径配置中显式的表示。比如路径 "/path/to/data1.SSD" 即表示这个路径是SSD存储介质。而 "data1.SSD" 就是实际的目录名称。Doris是根据目录名称后面的 ".SSD" 后缀来确定存储介质类型的,而不是实际的存储介质类型。也就是说,用户可以指定任意路径为SSD存储介质,而Doris仅识别目录后缀,不会去判断存储介质是否匹配。如果不写后缀,则默认为HDD。 - -换句话说,".HDD" 和 ".SSD" 只是用于标识存储目录“相对”的“低速”和“高速”之分,而并不是标识实际的存储介质类型。所以如果BE节点上的存储路径没有介质区别,则无需填写后缀。 - -### Q12. Unique Key 模型查询结果不一致 - -某些情况下,当用户使用相同的 SQL 查询一个 Unique Key 模型的表时,可能会出现多次查询结果不一致的现象。并且查询结果总在 2-3 种之间变化。 - -这可能是因为,在同一批导入数据中,出现了 key 相同但 value 不同的数据,这会导致,不同副本间,因数据覆盖的先后顺序不确定而产生的结果不一致的问题。 - -比如表定义为 k1, v1。一批次导入数据如下: - -``` -1, "abc" -1, "def" -``` - -那么可能副本1 的结果是 `1, "abc"`,而副本2 的结果是 `1, "def"`。从而导致查询结果不一致。 - -为了确保不同副本之间的数据先后顺序唯一,可以参考 [Sequence Column](../administrator-guide/load-data/sequence-column-manual.md) 功能。 - -### Q13. 多个FE,在使用Nginx实现web UI负载均衡时,无法登录 - -Doris 可以部署多个FE,在访问Web UI的时候,如果使用Nginx进行负载均衡,因为Session问题会出现不停的提示要重新登录,这个问题其实是Session共享的问题,Nginx提供了集中Session共享的解决方案,这里我们使用的是nginx中的ip_hash技术,ip_hash能够将某个ip的请求定向到同一台后端,这样一来这个ip下的某个客户端和某个后端就能建立起稳固的session,ip_hash是在upstream配置中定义的: - -``` -upstream doris.com { - server 172.22.197.238:8030 weight=3; - server 172.22.197.239:8030 weight=4; - server 172.22.197.240:8030 weight=4; - ip_hash; -} -``` -完整的Nginx示例配置如下: - -``` -user nginx; -worker_processes auto; -error_log /var/log/nginx/error.log; -pid /run/nginx.pid; - -# Load dynamic modules. See /usr/share/doc/nginx/README.dynamic. -include /usr/share/nginx/modules/*.conf; - -events { - worker_connections 1024; -} - -http { - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent" "$http_x_forwarded_for"'; - - access_log /var/log/nginx/access.log main; - - sendfile on; - tcp_nopush on; - tcp_nodelay on; - keepalive_timeout 65; - types_hash_max_size 2048; - - include /etc/nginx/mime.types; - default_type application/octet-stream; - - # Load modular configuration files from the /etc/nginx/conf.d directory. - # See http://nginx.org/en/docs/ngx_core_module.html#include - # for more information. - include /etc/nginx/conf.d/*.conf; - #include /etc/nginx/custom/*.conf; - upstream doris.com { - server 172.22.197.238:8030 weight=3; - server 172.22.197.239:8030 weight=4; - server 172.22.197.240:8030 weight=4; - ip_hash; - } - - server { - listen 80; - server_name gaia-pro-bigdata-fe02; - if ($request_uri ~ _load) { - return 307 http://$host$request_uri ; - } - - location / { - proxy_pass http://doris.com; - proxy_redirect default; - } - error_page 500 502 503 504 /50x.html; - location = /50x.html { - root html; - } - } - } -``` diff --git a/new-docs/zh-CN/faq/install-faq.md b/docs/zh-CN/faq/install-faq.md similarity index 100% rename from new-docs/zh-CN/faq/install-faq.md rename to docs/zh-CN/faq/install-faq.md diff --git a/new-docs/zh-CN/faq/sql-faq.md b/docs/zh-CN/faq/sql-faq.md similarity index 100% rename from new-docs/zh-CN/faq/sql-faq.md rename to docs/zh-CN/faq/sql-faq.md diff --git a/new-docs/zh-CN/get-starting/get-starting.md b/docs/zh-CN/get-starting/get-starting.md similarity index 99% rename from new-docs/zh-CN/get-starting/get-starting.md rename to docs/zh-CN/get-starting/get-starting.md index 62c0a23cce..36a9503b19 100644 --- a/new-docs/zh-CN/get-starting/get-starting.md +++ b/docs/zh-CN/get-starting/get-starting.md @@ -1,6 +1,6 @@ --- { - "title": "Get-Starting", + "title": "快速开始", "language": "zh-CN" } @@ -25,7 +25,7 @@ specific language governing permissions and limitations under the License. --> -# Apache Doris 快速入门 +# 快速开始 ## 环境准备 diff --git a/docs/zh-CN/getting-started/advance-usage.md b/docs/zh-CN/getting-started/advance-usage.md deleted file mode 100644 index c7395e64b3..0000000000 --- a/docs/zh-CN/getting-started/advance-usage.md +++ /dev/null @@ -1,280 +0,0 @@ ---- -{ - "title": "高级使用指南", - "language": "zh-CN" -} ---- - - - -# 高级使用指南 - -这里我们介绍 Doris 的一些高级特性。 - -## 1 表结构变更 - -使用 ALTER TABLE 命令可以修改表的 Schema,包括如下修改: - -* 增加列 -* 删除列 -* 修改列类型 -* 改变列顺序 - -以下举例说明。 - -原表 table1 的 Schema 如下: - -``` -+----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------+-------------+------+-------+---------+-------+ -| siteid | int(11) | No | true | 10 | | -| citycode | smallint(6) | No | true | N/A | | -| username | varchar(32) | No | true | | | -| pv | bigint(20) | No | false | 0 | SUM | -+----------+-------------+------+-------+---------+-------+ -``` - -我们新增一列 uv,类型为 BIGINT,聚合类型为 SUM,默认值为 0: - -`ALTER TABLE table1 ADD COLUMN uv BIGINT SUM DEFAULT '0' after pv;` - -提交成功后,可以通过以下命令查看作业进度: - -`SHOW ALTER TABLE COLUMN;` - -当作业状态为 FINISHED,则表示作业完成。新的 Schema 已生效。 - -ALTER TABLE 完成之后, 可以通过 `DESC TABLE` 查看最新的 Schema。 - -``` -mysql> DESC table1; -+----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------+-------------+------+-------+---------+-------+ -| siteid | int(11) | No | true | 10 | | -| citycode | smallint(6) | No | true | N/A | | -| username | varchar(32) | No | true | | | -| pv | bigint(20) | No | false | 0 | SUM | -| uv | bigint(20) | No | false | 0 | SUM | -+----------+-------------+------+-------+---------+-------+ -5 rows in set (0.00 sec) -``` - -可以使用以下命令取消当前正在执行的作业: - -`CANCEL ALTER TABLE COLUMN FROM table1` - -更多帮助,可以参阅 `HELP ALTER TABLE`。 - -## 2 Rollup - -Rollup 可以理解为 Table 的一个物化索引结构。**物化** 是因为其数据在物理上独立存储,而 **索引** 的意思是,Rollup可以调整列顺序以增加前缀索引的命中率,也可以减少key列以增加数据的聚合度。 - -以下举例说明。 - -原表table1的Schema如下: - -``` -+----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------+-------------+------+-------+---------+-------+ -| siteid | int(11) | No | true | 10 | | -| citycode | smallint(6) | No | true | N/A | | -| username | varchar(32) | No | true | | | -| pv | bigint(20) | No | false | 0 | SUM | -| uv | bigint(20) | No | false | 0 | SUM | -+----------+-------------+------+-------+---------+-------+ -``` - -对于 table1 明细数据是 siteid, citycode, username 三者构成一组 key,从而对 pv 字段进行聚合;如果业务方经常有看城市 pv 总量的需求,可以建立一个只有 citycode, pv 的rollup。 - -`ALTER TABLE table1 ADD ROLLUP rollup_city(citycode, pv);` - -提交成功后,可以通过以下命令查看作业进度: - -`SHOW ALTER TABLE ROLLUP;` - -当作业状态为 FINISHED,则表示作业完成。 - -Rollup 建立完成之后可以使用 `DESC table1 ALL` 查看表的 Rollup 信息。 - -``` -mysql> desc table1 all; -+-------------+----------+-------------+------+-------+--------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+-------------+----------+-------------+------+-------+---------+-------+ -| table1 | siteid | int(11) | No | true | 10 | | -| | citycode | smallint(6) | No | true | N/A | | -| | username | varchar(32) | No | true | | | -| | pv | bigint(20) | No | false | 0 | SUM | -| | uv | bigint(20) | No | false | 0 | SUM | -| | | | | | | | -| rollup_city | citycode | smallint(6) | No | true | N/A | | -| | pv | bigint(20) | No | false | 0 | SUM | -+-------------+----------+-------------+------+-------+---------+-------+ -8 rows in set (0.01 sec) -``` - -可以使用以下命令取消当前正在执行的作业: - -`CANCEL ALTER TABLE ROLLUP FROM table1;` - -Rollup 建立之后,查询不需要指定 Rollup 进行查询。还是指定原有表进行查询即可。程序会自动判断是否应该使用 Rollup。是否命中 Rollup可以通过 `EXPLAIN your_sql;` 命令进行查看。 - -更多帮助,可以参阅 `HELP ALTER TABLE`。 - -## 2 数据表的查询 - -### 2.1 内存限制 - -为了防止用户的一个查询可能因为消耗内存过大。查询进行了内存控制,一个查询任务,在单个 BE 节点上默认使用不超过 2GB 内存。 - -用户在使用时,如果发现报 `Memory limit exceeded` 错误,一般是超过内存限制了。 - -遇到内存超限时,用户应该尽量通过优化自己的 sql 语句来解决。 - -如果确切发现 2GB 内存不能满足,可以手动设置内存参数。 - -显示查询内存限制: - -``` -mysql> SHOW VARIABLES LIKE "%mem_limit%"; -+---------------+------------+ -| Variable_name | Value | -+---------------+------------+ -| exec_mem_limit| 2147483648 | -+---------------+------------+ -1 row in set (0.00 sec) -``` - -`exec_mem_limit` 的单位是 byte,可以通过 `SET` 命令改变 `exec_mem_limit` 的值。如改为 8GB。 - -`SET exec_mem_limit = 8589934592;` - -``` -mysql> SHOW VARIABLES LIKE "%mem_limit%"; -+---------------+------------+ -| Variable_name | Value | -+---------------+------------+ -| exec_mem_limit| 8589934592 | -+---------------+------------+ -1 row in set (0.00 sec) -``` - -> * 以上该修改为 session 级别,仅在当前连接 session 内有效。断开重连则会变回默认值。 -> * 如果需要修改全局变量,可以这样设置:`SET GLOBAL exec_mem_limit = 8589934592;`。设置完成后,断开 session 重新登录,参数将永久生效。 - -### 2.2 查询超时 - -当前默认查询时间设置为最长为 300 秒,如果一个查询在 300 秒内没有完成,则查询会被 Doris 系统 cancel 掉。用户可以通过这个参数来定制自己应用的超时时间,实现类似 wait(timeout) 的阻塞方式。 - -查看当前超时设置: - -``` -mysql> SHOW VARIABLES LIKE "%query_timeout%"; -+---------------+-------+ -| Variable_name | Value | -+---------------+-------+ -| QUERY_TIMEOUT | 300 | -+---------------+-------+ -1 row in set (0.00 sec) -``` - -修改超时时间到1分钟: - -`SET query_timeout = 60;` - -> * 当前超时的检查间隔为 5 秒,所以小于 5 秒的超时不会太准确。 -> * 以上修改同样为 session 级别。可以通过 `SET GLOBAL` 修改全局有效。 - -### 2.3 Broadcast/Shuffle Join - -系统提供了两种 Join 的实现方式,broadcast join 和 shuffle join(partitioned Join)。 - -broadcast join 是指将小表进行条件过滤后,将其广播到大表所在的各个节点上,形成一个内存 Hash 表,然后流式读出大表的数据进行 Hash Join。 - -shuffle join 是指将小表和大表都按照 Join 的 key 进行 Hash,然后进行分布式的 Join。 - -当小表的数据量较小时,broadcast join 拥有更好的性能。反之,则 shuffle join 拥有更好的性能。 - -系统会自动尝试进行 Broadcast Join,也可以显式指定每个 join 算子的实现方式。系统提供了可配置的参数`auto_broadcast_join_threshold`,指定使用 broadcast join 时,hash table 使用的内存占整体执行内存比例的上限,取值范围为 `0` 到 `1`,默认值为`0.8`。当系统计算 hash table 使用的内存会超过此限制时,会自动转换为使用 shuffle join。 - -当`auto_broadcast_join_threshold`被设置为小于等于`0`时,所有的 join 都将使用 shuffle join。 - -自动选择join方式(默认): - -``` -mysql> select sum(table1.pv) from table1 join table2 where table1.siteid = 2; -+--------------------+ -| sum(`table1`.`pv`) | -+--------------------+ -| 10 | -+--------------------+ -1 row in set (0.20 sec) -``` - -使用 Broadcast Join(显式指定): - -``` -mysql> select sum(table1.pv) from table1 join [broadcast] table2 where table1.siteid = 2; -+--------------------+ -| sum(`table1`.`pv`) | -+--------------------+ -| 10 | -+--------------------+ -1 row in set (0.20 sec) -``` - -使用 Shuffle Join: - -``` -mysql> select sum(table1.pv) from table1 join [shuffle] table2 where table1.siteid = 2; -+--------------------+ -| sum(`table1`.`pv`) | -+--------------------+ -| 10 | -+--------------------+ -1 row in set (0.15 sec) -``` - -### 2.4 查询重试和高可用 - -当部署多个 FE 节点时,用户可以在多个 FE 之上部署负载均衡层来实现 Doris 的高可用。 - -以下提供一些高可用的方案: - -**第一种** - -自己在应用层代码进行重试和负载均衡。比如发现一个连接挂掉,就自动在其他连接上进行重试。应用层代码重试需要应用自己配置多个doris前端节点地址。 - -**第二种** - -如果使用 mysql jdbc connector 来连接Doris,可以使用 jdbc 的自动重试机制: - -``` -jdbc:mysql://[host1][:port1],[host2][:port2][,[host3][:port3]]...[/[database]][?propertyName1=propertyValue1[&propertyName2=propertyValue2]...] -``` - -**第三种** - -应用可以连接到和应用部署到同一机器上的 MySQL Proxy,通过配置 MySQL Proxy 的 Failover 和 Load Balance 功能来达到目的。 - -`http://dev.mysql.com/doc/refman/5.6/en/mysql-proxy-using.html` \ No newline at end of file diff --git a/docs/zh-CN/getting-started/basic-usage.md b/docs/zh-CN/getting-started/basic-usage.md deleted file mode 100644 index 507f2d3527..0000000000 --- a/docs/zh-CN/getting-started/basic-usage.md +++ /dev/null @@ -1,381 +0,0 @@ ---- -{ - "title": "基础使用指南", - "language": "zh-CN" -} ---- - - - -# 基础使用指南 - -Doris 采用 MySQL 协议进行通信,用户可通过 MySQL client 或者 MySQL JDBC连接到 Doris 集群。选择 MySQL client 版本时建议采用5.1 之后的版本,因为 5.1 之前不能支持长度超过 16 个字符的用户名。本文以 MySQL client 为例,通过一个完整的流程向用户展示 Doris 的基本使用方法。 - -## 1 创建用户 - -### 1.1 Root 用户登录与密码修改 - -Doris 内置 root 和 admin 用户,密码默认都为空。启动完 Doris 程序之后,可以通过 root 或 admin 用户连接到 Doris 集群。 -使用下面命令即可登录 Doris: - -``` -mysql -h FE_HOST -P9030 -uroot -``` - -> `fe_host` 是任一 FE 节点的 ip 地址。`9030` 是 fe.conf 中的 query_port 配置。 - -登陆后,可以通过以下命令修改 root 密码 - -``` -SET PASSWORD FOR 'root' = PASSWORD('your_password'); -``` - -### 1.3 创建新用户 - -通过下面的命令创建一个普通用户。 - -``` -CREATE USER 'test' IDENTIFIED BY 'test_passwd'; -``` - -后续登录时就可以通过下列连接命令登录。 - -``` -mysql -h FE_HOST -P9030 -utest -ptest_passwd -``` - -> 新创建的普通用户默认没有任何权限。权限授予可以参考后面的权限授予。 - -## 2 数据表的创建与数据导入 - -### 2.1 创建数据库 - -初始可以通过 root 或 admin 用户创建数据库: - -`CREATE DATABASE example_db;` - -> 所有命令都可以使用 `HELP command;` 查看到详细的语法帮助。如:`HELP CREATE DATABASE;` - -> 如果不清楚命令的全名,可以使用 "help 命令某一字段" 进行模糊查询。如键入 `HELP CREATE`,可以匹配到 `CREATE DATABASE`, `CREATE TABLE`, `CREATE USER` 等命令。 - -数据库创建完成之后,可以通过 `SHOW DATABASES;` 查看数据库信息。 - -``` -MySQL> SHOW DATABASES; -+--------------------+ -| Database | -+--------------------+ -| example_db | -| information_schema | -+--------------------+ -2 rows in set (0.00 sec) -``` - -information_schema是为了兼容MySQL协议而存在,实际中信息可能不是很准确,所以关于具体数据库的信息建议通过直接查询相应数据库而获得。 - -### 2.2 账户授权 - -example_db 创建完成之后,可以通过 root/admin 账户将 example_db 读写权限授权给普通账户,如 test。授权之后采用 test 账户登录就可以操作 example_db 数据库了。 - -`GRANT ALL ON example_db TO test;` - -### 2.3 建表 - -使用 `CREATE TABLE` 命令建立一个表(Table)。更多详细参数可以查看: - -`HELP CREATE TABLE;` - -首先切换数据库: - -`USE example_db;` - -Doris支持支持单分区和复合分区两种建表方式。 - -在复合分区中: - -* 第一级称为 Partition,即分区。用户可以指定某一维度列作为分区列(当前只支持整型和时间类型的列),并指定每个分区的取值范围。 - -* 第二级称为 Distribution,即分桶。用户可以指定一个或多个维度列以及桶数对数据进行 HASH 分布。 - -以下场景推荐使用复合分区 - -* 有时间维度或类似带有有序值的维度,可以以这类维度列作为分区列。分区粒度可以根据导入频次、分区数据量等进行评估。 -* 历史数据删除需求:如有删除历史数据的需求(比如仅保留最近N 天的数据)。使用复合分区,可以通过删除历史分区来达到目的。也可以通过在指定分区内发送 DELETE 语句进行数据删除。 -* 解决数据倾斜问题:每个分区可以单独指定分桶数量。如按天分区,当每天的数据量差异很大时,可以通过指定分区的分桶数,合理划分不同分区的数据,分桶列建议选择区分度大的列。 - -用户也可以不使用复合分区,即使用单分区。则数据只做 HASH 分布。 - -下面以聚合模型为例,分别演示两种分区的建表语句。 - -#### 单分区 - -建立一个名字为 table1 的逻辑表。分桶列为 siteid,桶数为 10。 - -这个表的 schema 如下: - -* siteid:类型是 INT(4 字节), 默认值为 10 字节。 -* citycode:类型是 SMALLINT(2 字节)。 -* username:类型是 VARCHAR, 最大长度为 32 字节, 默认值为空字符串。 -* pv:类型是 BIGINT(8 字节), 默认值是 0; 这是一个指标列, Doris 内部会对指标列做聚合操作, 这个列的聚合方法是求和(SUM)。 - -建表语句如下: -``` -CREATE TABLE table1 -( - siteid INT DEFAULT '10', - citycode SMALLINT, - username VARCHAR(32) DEFAULT '', - pv BIGINT SUM DEFAULT '0' -) -AGGREGATE KEY(siteid, citycode, username) -DISTRIBUTED BY HASH(siteid) BUCKETS 10 -PROPERTIES("replication_num" = "1"); -``` - -#### 复合分区 - -建立一个名字为 table2 的逻辑表。 - -这个表的 schema 如下: - -* event_day:类型是 DATE,无默认值。 -* siteid:类型是 INT(4 字节), 默认值为 10 字节。 -* citycode:类型是 SMALLINT(2 字节)。 -* username:类型是 VARCHAR, 最大长度为 32 字节, 默认值为空字符串。 -* pv:类型是 BIGINT(8 字节), 默认值是 0 字节; 这是一个指标列, Doris 内部会对指标列做聚合操作, 这个列的聚合方法是求和(SUM)。 - -我们使用 event_day 列作为分区列,建立 3 个分区: p201706, p201707, p201708 - -* p201706:范围为 [最小值, 2017-07-01) -* p201707:范围为 [2017-07-01, 2017-08-01) -* p201708:范围为 [2017-08-01, 2017-09-01) - -> 注意区间为左闭右开。 - -每个分区使用 siteid 进行哈希分桶,桶数为 10。 - -建表语句如下: -``` -CREATE TABLE table2 -( - event_day DATE, - siteid INT DEFAULT '10', - citycode SMALLINT, - username VARCHAR(32) DEFAULT '', - pv BIGINT SUM DEFAULT '0' -) -AGGREGATE KEY(event_day, siteid, citycode, username) -PARTITION BY RANGE(event_day) -( - PARTITION p201706 VALUES LESS THAN ('2017-07-01'), - PARTITION p201707 VALUES LESS THAN ('2017-08-01'), - PARTITION p201708 VALUES LESS THAN ('2017-09-01') -) -DISTRIBUTED BY HASH(siteid) BUCKETS 10 -PROPERTIES("replication_num" = "1"); -``` - -表建完之后,可以查看 example_db 中表的信息: - -``` -MySQL> SHOW TABLES; -+----------------------+ -| Tables_in_example_db | -+----------------------+ -| table1 | -| table2 | -+----------------------+ -2 rows in set (0.01 sec) - -MySQL> DESC table1; -+----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------+-------------+------+-------+---------+-------+ -| siteid | int(11) | Yes | true | 10 | | -| citycode | smallint(6) | Yes | true | N/A | | -| username | varchar(32) | Yes | true | | | -| pv | bigint(20) | Yes | false | 0 | SUM | -+----------+-------------+------+-------+---------+-------+ -4 rows in set (0.00 sec) - -MySQL> DESC table2; -+-----------+-------------+------+-------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+-----------+-------------+------+-------+---------+-------+ -| event_day | date | Yes | true | N/A | | -| siteid | int(11) | Yes | true | 10 | | -| citycode | smallint(6) | Yes | true | N/A | | -| username | varchar(32) | Yes | true | | | -| pv | bigint(20) | Yes | false | 0 | SUM | -+-----------+-------------+------+-------+---------+-------+ -5 rows in set (0.00 sec) -``` - -> 注意事项: -> -> 1. 上述表通过设置 replication_num 建的都是单副本的表,Doris 建议用户采用默认的 3 副本设置,以保证高可用。 -> 2. 可以对复合分区表动态的增删分区。详见 `HELP ALTER TABLE` 中 Partition 相关部分。 -> 3. 数据导入可以导入指定的 Partition。详见 `HELP LOAD`。 -> 4. 可以动态修改表的 Schema。 -> 5. 可以对 Table 增加上卷表(Rollup)以提高查询性能,这部分可以参见高级使用指南关于 Rollup 的描述。 -> 6. 表的列的 Null 属性默认为 true,会对查询性能有一定的影响。 - -### 2.4 导入数据 - -Doris 支持多种数据导入方式。具体可以参阅数据导入文档。这里我们使用流式导入和 Broker 导入做示例。 - -#### 流式导入 - -流式导入通过 HTTP 协议向 Doris 传输数据,可以不依赖其他系统或组件直接导入本地数据。详细语法帮助可以参阅 `HELP STREAM LOAD;`。 - -示例1:以 "table1_20170707" 为 Label,使用本地文件 table1_data 导入 table1 表。 - -``` -curl --location-trusted -u test:test_passwd -H "label:table1_20170707" -H "column_separator:," -T table1_data http://FE_HOST:8030/api/example_db/table1/_stream_load -``` - -> 1. FE_HOST 是任一 FE 所在节点 IP,8030 为 fe.conf 中的 http_port。 -> 2. 可以使用任一 BE 的 IP,以及 be.conf 中的 webserver_port 进行导入。如:`BE_HOST:8040` - -本地文件 `table1_data` 以 `,` 作为数据之间的分隔,具体内容如下: - -``` -1,1,jim,2 -2,1,grace,2 -3,2,tom,2 -4,3,bush,3 -5,3,helen,3 -``` - -示例2: 以 "table2_20170707" 为 Label,使用本地文件 table2_data 导入 table2 表。 - -``` -curl --location-trusted -u test:test -H "label:table2_20170707" -H "column_separator:|" -T table2_data http://127.0.0.1:8030/api/example_db/table2/_stream_load -``` - -本地文件 `table2_data` 以 `|` 作为数据之间的分隔,具体内容如下: - -``` -2017-07-03|1|1|jim|2 -2017-07-05|2|1|grace|2 -2017-07-12|3|2|tom|2 -2017-07-15|4|3|bush|3 -2017-07-12|5|3|helen|3 -``` - -> 注意事项: -> -> 1. 采用流式导入建议文件大小限制在 10GB 以内,过大的文件会导致失败重试代价变大。 -> 2. 每一批导入数据都需要取一个 Label,Label 最好是一个和一批数据有关的字符串,方便阅读和管理。Doris 基于 Label 保证在一个Database 内,同一批数据只可导入成功一次。失败任务的 Label 可以重用。 -> 3. 流式导入是同步命令。命令返回成功则表示数据已经导入,返回失败表示这批数据没有导入。 - -#### Broker 导入 - -Broker 导入通过部署的 Broker 进程,读取外部存储上的数据进行导入。更多帮助请参阅 `HELP BROKER LOAD;` - -示例:以 "table1_20170708" 为 Label,将 HDFS 上的文件导入 table1 表 - -``` -LOAD LABEL table1_20170708 -( - DATA INFILE("hdfs://your.namenode.host:port/dir/table1_data") - INTO TABLE table1 -) -WITH BROKER hdfs -( - "username"="hdfs_user", - "password"="hdfs_password" -) -PROPERTIES -( - "timeout"="3600", - "max_filter_ratio"="0.1" -); -``` - -Broker 导入是异步命令。以上命令执行成功只表示提交任务成功。导入是否成功需要通过 `SHOW LOAD;` 查看。如: - -`SHOW LOAD WHERE LABEL = "table1_20170708";` - -返回结果中,`State` 字段为 FINISHED 则表示导入成功。 - -关于 `SHOW LOAD` 的更多说明,可以参阅 `HELP SHOW LOAD;` - -异步的导入任务在结束前可以取消: - -`CANCEL LOAD WHERE LABEL = "table1_20170708";` - -## 3 数据的查询 - -### 3.1 简单查询 - -示例: - -``` -MySQL> SELECT * FROM table1 LIMIT 3; -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 2 | 1 | 'grace' | 2 | -| 5 | 3 | 'helen' | 3 | -| 3 | 2 | 'tom' | 2 | -+--------+----------+----------+------+ -3 rows in set (0.01 sec) - -MySQL> SELECT * FROM table1 ORDER BY citycode; -+--------+----------+----------+------+ -| siteid | citycode | username | pv | -+--------+----------+----------+------+ -| 2 | 1 | 'grace' | 2 | -| 1 | 1 | 'jim' | 2 | -| 3 | 2 | 'tom' | 2 | -| 4 | 3 | 'bush' | 3 | -| 5 | 3 | 'helen' | 3 | -+--------+----------+----------+------+ -5 rows in set (0.01 sec) -``` - -### 3.3 Join 查询 - -示例: - -``` -MySQL> SELECT SUM(table1.pv) FROM table1 JOIN table2 WHERE table1.siteid = table2.siteid; -+--------------------+ -| sum(`table1`.`pv`) | -+--------------------+ -| 12 | -+--------------------+ -1 row in set (0.20 sec) -``` - -### 3.4 子查询 - -示例: - -``` -MySQL> SELECT SUM(pv) FROM table2 WHERE siteid IN (SELECT siteid FROM table1 WHERE siteid > 2); -+-----------+ -| sum(`pv`) | -+-----------+ -| 8 | -+-----------+ -1 row in set (0.13 sec) -``` diff --git a/docs/zh-CN/getting-started/best-practice.md b/docs/zh-CN/getting-started/best-practice.md deleted file mode 100644 index a51adfa88b..0000000000 --- a/docs/zh-CN/getting-started/best-practice.md +++ /dev/null @@ -1,197 +0,0 @@ ---- -{ - "title": "最佳实践", - "language": "zh-CN" -} ---- - - - -# 最佳实践 - -## 1 建表 - -### 1.1 数据模型选择 - -Doris 数据模型上目前分为三类: AGGREGATE KEY, UNIQUE KEY, DUPLICATE KEY。三种模型中数据都是按KEY进行排序。 - -1.1.1 AGGREGATE KEY - - AGGREGATE KEY相同时,新旧记录进行聚合,目前支持的聚合函数有SUM, MIN, MAX, REPLACE。 - - AGGREGATE KEY模型可以提前聚合数据, 适合报表和多维分析业务。 - - ``` - CREATE TABLE site_visit - ( - siteid INT, - city SMALLINT, - username VARCHAR(32), - pv BIGINT SUM DEFAULT '0' - ) - AGGREGATE KEY(siteid, city, username) - DISTRIBUTED BY HASH(siteid) BUCKETS 10; - ``` - -1.1.2. UNIQUE KEY - - UNIQUE KEY 相同时,新记录覆盖旧记录。目前 UNIQUE KEY 实现上和 AGGREGATE KEY 的 REPLACE 聚合方法一样,二者本质上相同。适用于有更新需求的分析业务。 - - ``` - CREATE TABLE sales_order - ( - orderid BIGINT, - status TINYINT, - username VARCHAR(32), - amount BIGINT DEFAULT '0' - ) - UNIQUE KEY(orderid) - DISTRIBUTED BY HASH(orderid) BUCKETS 10; - ``` - -1.1.3. DUPLICATE KEY - - 只指定排序列,相同的行不会合并。适用于数据无需提前聚合的分析业务。 - - ``` - CREATE TABLE session_data - ( - visitorid SMALLINT, - sessionid BIGINT, - visittime DATETIME, - city CHAR(20), - province CHAR(20), - ip varchar(32), - brower CHAR(20), - url VARCHAR(1024) - ) - DUPLICATE KEY(visitorid, sessionid) - DISTRIBUTED BY HASH(sessionid, visitorid) BUCKETS 10; - ``` - -### 1.2 大宽表与 Star Schema - -业务方建表时, 为了和前端业务适配, 往往不对维度信息和指标信息加以区分, 而将 Schema 定义成大宽表。对于 Doris 而言, 这类大宽表往往性能不尽如人意: - -* Schema 中字段数比较多, 聚合模型中可能 key 列比较多, 导入过程中需要排序的列会增加。 -* 维度信息更新会反应到整张表中,而更新的频率直接影响查询的效率。 - -使用过程中,建议用户尽量使用 Star Schema 区分维度表和指标表。频繁更新的维度表也可以放在 MySQL 外部表中。而如果只有少量更新, 可以直接放在 Doris 中。在 Doris 中存储维度表时,可对维度表设置更多的副本,提升 Join 的性能。 - -### 1.3 分区和分桶 - -Doris 支持两级分区存储, 第一层为分区(partition),目前支持 RANGE 分区和 LIST 分区两种类型, 第二层为 HASH 分桶(bucket)。 - -1.3.1. 分区(partition) - - 分区用于将数据划分成不同区间, 逻辑上可以理解为将原始表划分成了多个子表。可以方便的按分区对数据进行管理,例如,删除数据时,更加迅速。 - -1.3.1.1. RANGE分区 - - 业务上,多数用户会选择采用按时间进行partition, 让时间进行partition有以下好处: - - * 可区分冷热数据 - * 可用上Doris分级存储(SSD + SATA)的功能 - -1.3.1.2. LIST分区 - - 业务上,用户可以选择城市或者其他枚举值进行partition。 - - -1.3.2. HASH分桶(bucket) - - 根据hash值将数据划分成不同的 bucket。 - - * 建议采用区分度大的列做分桶, 避免出现数据倾斜 - * 为方便数据恢复, 建议单个 bucket 的 size 不要太大, 保持在 10GB 以内, 所以建表或增加 partition 时请合理考虑 bucket 数目, 其中不同 partition 可指定不同的 buckets 数。 - -### 1.4 稀疏索引和 Bloom Filter - -Doris对数据进行有序存储, 在数据有序的基础上为其建立稀疏索引,索引粒度为 block(1024行)。 - -稀疏索引选取 schema 中固定长度的前缀作为索引内容, 目前 Doris 选取 36 个字节的前缀作为索引。 - -* 建表时建议将查询中常见的过滤字段放在 Schema 的前面, 区分度越大,频次越高的查询字段越往前放。 -* 这其中有一个特殊的地方,就是 varchar 类型的字段。varchar 类型字段只能作为稀疏索引的最后一个字段。索引会在 varchar 处截断, 因此 varchar 如果出现在前面,可能索引的长度可能不足 36 个字节。具体可以参阅 [数据模型、ROLLUP 及前缀索引](./data-model-rollup.md)。 -* 除稀疏索引之外, Doris还提供bloomfilter索引, bloomfilter索引对区分度比较大的列过滤效果明显。 如果考虑到varchar不能放在稀疏索引中, 可以建立bloomfilter索引。 - -### 1.5 物化视图(rollup) - -Rollup 本质上可以理解为原始表(Base Table)的一个物化索引。建立 Rollup 时可只选取 Base Table 中的部分列作为 Schema。Schema 中的字段顺序也可与 Base Table 不同。 - -下列情形可以考虑建立 Rollup: - -1.5.1. Base Table 中数据聚合度不高。 - -这一般是因 Base Table 有区分度比较大的字段而导致。此时可以考虑选取部分列,建立 Rollup。 - -如对于 `site_visit` 表: - -``` -site_visit(siteid, city, username, pv) -``` - -siteid 可能导致数据聚合度不高,如果业务方经常根据城市统计pv需求,可以建立一个只有 city, pv 的 Rollup: - -``` -ALTER TABLE site_visit ADD ROLLUP rollup_city(city, pv); -``` - -1.5.2. Base Table 中的前缀索引无法命中 - -这一般是 Base Table 的建表方式无法覆盖所有的查询模式。此时可以考虑调整列顺序,建立 Rollup。 - -如对于 session_data 表: - -``` -session_data(visitorid, sessionid, visittime, city, province, ip, brower, url) -``` - -如果除了通过 visitorid 分析访问情况外,还有通过 brower, province 分析的情形,可以单独建立 Rollup。 - -``` -ALTER TABLE session_data ADD ROLLUP rollup_brower(brower,province,ip,url) DUPLICATE KEY(brower,province); -``` - -## 2 Schema Change - -Doris中目前进行 Schema Change 的方式有三种:Sorted Schema Change,Direct Schema Change, Linked Schema Change。 - -2.1. Sorted Schema Change - - 改变了列的排序方式,需对数据进行重新排序。例如删除排序列中的一列, 字段重排序。 - - ``` - ALTER TABLE site_visit DROP COLUMN city; - ``` - -2.2. Direct Schema Change: 无需重新排序,但是需要对数据做一次转换。例如修改列的类型,在稀疏索引中加一列等。 - - ``` - ALTER TABLE site_visit MODIFY COLUMN username varchar(64); - ``` - -2.3. Linked Schema Change: 无需转换数据,直接完成。例如加列操作。 - - ``` - ALTER TABLE site_visit ADD COLUMN click bigint SUM default '0'; - ``` - -建表时建议考虑好 Schema,这样在进行 Schema Change 时可以加快速度。 diff --git a/docs/zh-CN/getting-started/data-model-rollup.md b/docs/zh-CN/getting-started/data-model-rollup.md deleted file mode 100644 index eda5f075ec..0000000000 --- a/docs/zh-CN/getting-started/data-model-rollup.md +++ /dev/null @@ -1,638 +0,0 @@ ---- -{ - "title": "数据模型、ROLLUP 及前缀索引", - "language": "zh-CN" -} ---- - - - -# 数据模型、ROLLUP 及前缀索引 - -本文档主要从逻辑层面,描述 Doris 的数据模型、 ROLLUP 以及前缀索引的概念,以帮助用户更好的使用 Doris 应对不同的业务场景。 - -## 基本概念 - -在 Doris 中,数据以表(Table)的形式进行逻辑上的描述。 -一张表包括行(Row)和列(Column)。Row 即用户的一行数据。Column 用于描述一行数据中不同的字段。 - -Column 可以分为两大类:Key 和 Value。从业务角度看,Key 和 Value 可以分别对应维度列和指标列。 - -Doris 的数据模型主要分为3类: - -* Aggregate -* Unique -* Duplicate - -下面我们分别介绍。 - -## Aggregate 模型 - -我们以实际的例子来说明什么是聚合模型,以及如何正确的使用聚合模型。 - -### 示例1:导入数据聚合 - -假设业务有如下数据表模式: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -|user\_id|LARGEINT||用户id| -|date|DATE||数据灌入日期| -|city|VARCHAR(20)||用户所在城市| -|age|SMALLINT||用户年龄| -|sex|TINYINT||用户性别| -|last_visit_date|DATETIME|REPLACE|用户最后一次访问时间| -|cost|BIGINT|SUM|用户总消费| -|max\_dwell\_time|INT|MAX|用户最大停留时间| -|min\_dwell\_time|INT|MIN|用户最小停留时间| - -如果转换成建表语句则如下(省略建表语句中的 Partition 和 Distribution 信息) - -``` -CREATE TABLE IF NOT EXISTS example_db.expamle_tbl -( - `user_id` LARGEINT NOT NULL COMMENT "用户id", - `date` DATE NOT NULL COMMENT "数据灌入日期时间", - `city` VARCHAR(20) COMMENT "用户所在城市", - `age` SMALLINT COMMENT "用户年龄", - `sex` TINYINT COMMENT "用户性别", - `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间", - `cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费", - `max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间", - `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间" -) -AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`) -... /* 省略 Partition 和 Distribution 信息 */ -; -``` - -可以看到,这是一个典型的用户信息和访问行为的事实表。 -在一般星型模型中,用户信息和访问行为一般分别存放在维度表和事实表中。这里我们为了更加方便的解释 Doris 的数据模型,将两部分信息统一存放在一张表中。 - -表中的列按照是否设置了 `AggregationType`,分为 Key (维度列) 和 Value(指标列)。没有设置 `AggregationType` 的,如 `user_id`、`date`、`age` ... 等称为 **Key**,而设置了 `AggregationType` 的称为 **Value**。 - -当我们导入数据时,对于 Key 列相同的行会聚合成一行,而 Value 列会按照设置的 `AggregationType` 进行聚合。 `AggregationType` 目前有以下四种聚合方式: - -1. SUM:求和,多行的 Value 进行累加。 -2. REPLACE:替代,下一批数据中的 Value 会替换之前导入过的行中的 Value。 -3. MAX:保留最大值。 -4. MIN:保留最小值。 - -假设我们有以下导入数据(原始数据): - -|user\_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -|10000|2017-10-01|北京|20|0|2017-10-01 06:00:00|20|10|10| -|10000|2017-10-01|北京|20|0|2017-10-01 07:00:00|15|2|2| -|10001|2017-10-01|北京|30|1|2017-10-01 17:05:45|2|22|22| -|10002|2017-10-02|上海|20|1|2017-10-02 12:59:12|200|5|5| -|10003|2017-10-02|广州|32|0|2017-10-02 11:20:00|30|11|11| -|10004|2017-10-01|深圳|35|0|2017-10-01 10:00:15|100|3|3| -|10004|2017-10-03|深圳|35|0|2017-10-03 10:20:22|11|6|6| - -我们假设这是一张记录用户访问某商品页面行为的表。我们以第一行数据为例,解释如下: - -|数据|说明| -|---|---| -|10000|用户id,每个用户唯一识别id| -|2017-10-01|数据入库时间,精确到日期| -|北京|用户所在城市| -|20|用户年龄| -|0|性别男(1 代表女性)| -|2017-10-01 06:00:00|用户本次访问该页面的时间,精确到秒| -|20|用户本次访问产生的消费| -|10|用户本次访问,驻留该页面的时间| -|10|用户本次访问,驻留该页面的时间(冗余)| - -那么当这批数据正确导入到 Doris 中后,Doris 中最终存储如下: - -|user\_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -|10000|2017-10-01|北京|20|0|2017-10-01 07:00:00|35|10|2| -|10001|2017-10-01|北京|30|1|2017-10-01 17:05:45|2|22|22| -|10002|2017-10-02|上海|20|1|2017-10-02 12:59:12|200|5|5| -|10003|2017-10-02|广州|32|0|2017-10-02 11:20:00|30|11|11| -|10004|2017-10-01|深圳|35|0|2017-10-01 10:00:15|100|3|3| -|10004|2017-10-03|深圳|35|0|2017-10-03 10:20:22|11|6|6| - -可以看到,用户 10000 只剩下了一行**聚合后**的数据。而其余用户的数据和原始数据保持一致。这里先解释下用户 10000 聚合后的数据: - -前5列没有变化,从第6列 `last_visit_date` 开始: - -* `2017-10-01 07:00:00`:因为 `last_visit_date` 列的聚合方式为 REPLACE,所以 `2017-10-01 07:00:00` 替换了 `2017-10-01 06:00:00` 保存了下来。 - > 注:在同一个导入批次中的数据,对于 REPLACE 这种聚合方式,替换顺序不做保证。如在这个例子中,最终保存下来的,也有可能是 `2017-10-01 06:00:00`。而对于不同导入批次中的数据,可以保证,后一批次的数据会替换前一批次。 - -* `35`:因为 `cost` 列的聚合类型为 SUM,所以由 20 + 15 累加获得 35。 -* `10`:因为 `max_dwell_time` 列的聚合类型为 MAX,所以 10 和 2 取最大值,获得 10。 -* `2`:因为 `min_dwell_time` 列的聚合类型为 MIN,所以 10 和 2 取最小值,获得 2。 - -经过聚合,Doris 中最终只会存储聚合后的数据。换句话说,即明细数据会丢失,用户不能够再查询到聚合前的明细数据了。 - -### 示例2:保留明细数据 - -接示例1,我们将表结构修改如下: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -|user\_id|LARGEINT||用户id| -|date|DATE||数据灌入日期| -|timestamp|DATETIME||数据灌入时间,精确到秒| -|city|VARCHAR(20)||用户所在城市| -|age|SMALLINT||用户年龄| -|sex|TINYINT||用户性别| -|last\_visit\_date|DATETIME|REPLACE|用户最后一次访问时间| -|cost|BIGINT|SUM|用户总消费| -|max\_dwell\_time|INT|MAX|用户最大停留时间| -|min\_dwell\_time|INT|MIN|用户最小停留时间| - -即增加了一列 `timestamp`,记录精确到秒的数据灌入时间。 - -导入数据如下: - -|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---|---| -|10000|2017-10-01|2017-10-01 08:00:05|北京|20|0|2017-10-01 06:00:00|20|10|10| -|10000|2017-10-01|2017-10-01 09:00:05|北京|20|0|2017-10-01 07:00:00|15|2|2| -|10001|2017-10-01|2017-10-01 18:12:10|北京|30|1|2017-10-01 17:05:45|2|22|22| -|10002|2017-10-02|2017-10-02 13:10:00|上海|20|1|2017-10-02 12:59:12|200|5|5| -|10003|2017-10-02|2017-10-02 13:15:00|广州|32|0|2017-10-02 11:20:00|30|11|11| -|10004|2017-10-01|2017-10-01 12:12:48|深圳|35|0|2017-10-01 10:00:15|100|3|3| -|10004|2017-10-03|2017-10-03 12:38:20|深圳|35|0|2017-10-03 10:20:22|11|6|6| - -那么当这批数据正确导入到 Doris 中后,Doris 中最终存储如下: - -|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---|---| -|10000|2017-10-01|2017-10-01 08:00:05|北京|20|0|2017-10-01 06:00:00|20|10|10| -|10000|2017-10-01|2017-10-01 09:00:05|北京|20|0|2017-10-01 07:00:00|15|2|2| -|10001|2017-10-01|2017-10-01 18:12:10|北京|30|1|2017-10-01 17:05:45|2|22|22| -|10002|2017-10-02|2017-10-02 13:10:00|上海|20|1|2017-10-02 12:59:12|200|5|5| -|10003|2017-10-02|2017-10-02 13:15:00|广州|32|0|2017-10-02 11:20:00|30|11|11| -|10004|2017-10-01|2017-10-01 12:12:48|深圳|35|0|2017-10-01 10:00:15|100|3|3| -|10004|2017-10-03|2017-10-03 12:38:20|深圳|35|0|2017-10-03 10:20:22|11|6|6| - -我们可以看到,存储的数据,和导入数据完全一样,没有发生任何聚合。这是因为,这批数据中,因为加入了 `timestamp` 列,所有行的 Key 都**不完全相同**。也就是说,只要保证导入的数据中,每一行的 Key 都不完全相同,那么即使在聚合模型下,Doris 也可以保存完整的明细数据。 - -### 示例3:导入数据与已有数据聚合 - -接示例1。假设现在表中已有数据如下: - -|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -|10000|2017-10-01|北京|20|0|2017-10-01 07:00:00|35|10|2| -|10001|2017-10-01|北京|30|1|2017-10-01 17:05:45|2|22|22| -|10002|2017-10-02|上海|20|1|2017-10-02 12:59:12|200|5|5| -|10003|2017-10-02|广州|32|0|2017-10-02 11:20:00|30|11|11| -|10004|2017-10-01|深圳|35|0|2017-10-01 10:00:15|100|3|3| -|10004|2017-10-03|深圳|35|0|2017-10-03 10:20:22|11|6|6| - -我们再导入一批新的数据: - -|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -|10004|2017-10-03|深圳|35|0|2017-10-03 11:22:00|44|19|19| -|10005|2017-10-03|长沙|29|1|2017-10-03 18:11:02|3|1|1| - -那么当这批数据正确导入到 Doris 中后,Doris 中最终存储如下: - -|user_id|date|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---| -|10000|2017-10-01|北京|20|0|2017-10-01 07:00:00|35|10|2| -|10001|2017-10-01|北京|30|1|2017-10-01 17:05:45|2|22|22| -|10002|2017-10-02|上海|20|1|2017-10-02 12:59:12|200|5|5| -|10003|2017-10-02|广州|32|0|2017-10-02 11:20:00|30|11|11| -|10004|2017-10-01|深圳|35|0|2017-10-01 10:00:15|100|3|3| -|10004|2017-10-03|深圳|35|0|2017-10-03 11:22:00|55|19|6| -|10005|2017-10-03|长沙|29|1|2017-10-03 18:11:02|3|1|1| - -可以看到,用户 10004 的已有数据和新导入的数据发生了聚合。同时新增了 10005 用户的数据。 - -数据的聚合,在 Doris 中有如下三个阶段发生: - -1. 每一批次数据导入的 ETL 阶段。该阶段会在每一批次导入的数据内部进行聚合。 -2. 底层 BE 进行数据 Compaction 的阶段。该阶段,BE 会对已导入的不同批次的数据进行进一步的聚合。 -3. 数据查询阶段。在数据查询时,对于查询涉及到的数据,会进行对应的聚合。 - -数据在不同时间,可能聚合的程度不一致。比如一批数据刚导入时,可能还未与之前已存在的数据进行聚合。但是对于用户而言,用户**只能查询到**聚合后的数据。即不同的聚合程度对于用户查询而言是透明的。用户需始终认为数据以**最终的完成的聚合程度**存在,而**不应假设某些聚合还未发生**。(可参阅**聚合模型的局限性**一节获得更多详情。) - -## Unique 模型 - -在某些多维分析场景下,用户更关注的是如何保证 Key 的唯一性,即如何获得 Primary Key 唯一性约束。因此,我们引入了 Unique 的数据模型。该模型本质上是聚合模型的一个特例,也是一种简化的表结构表示方式。我们举例说明。 - -|ColumnName|Type|IsKey|Comment| -|---|---|---|---| -|user_id|BIGINT|Yes|用户id| -|username|VARCHAR(50)|Yes|用户昵称| -|city|VARCHAR(20)|No|用户所在城市| -|age|SMALLINT|No|用户年龄| -|sex|TINYINT|No|用户性别| -|phone|LARGEINT|No|用户电话| -|address|VARCHAR(500)|No|用户住址| -|register_time|DATETIME|No|用户注册时间| - -这是一个典型的用户基础信息表。这类数据没有聚合需求,只需保证主键唯一性。(这里的主键为 user_id + username)。那么我们的建表语句如下: - -``` -CREATE TABLE IF NOT EXISTS example_db.expamle_tbl -( - `user_id` LARGEINT NOT NULL COMMENT "用户id", - `username` VARCHAR(50) NOT NULL COMMENT "用户昵称", - `city` VARCHAR(20) COMMENT "用户所在城市", - `age` SMALLINT COMMENT "用户年龄", - `sex` TINYINT COMMENT "用户性别", - `phone` LARGEINT COMMENT "用户电话", - `address` VARCHAR(500) COMMENT "用户地址", - `register_time` DATETIME COMMENT "用户注册时间" -) -UNIQUE KEY(`user_id`, `username`) -... /* 省略 Partition 和 Distribution 信息 */ -; -``` - -而这个表结构,完全同等于以下使用聚合模型描述的表结构: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -|user_id|BIGINT||用户id| -|username|VARCHAR(50)||用户昵称| -|city|VARCHAR(20)|REPLACE|用户所在城市| -|age|SMALLINT|REPLACE|用户年龄| -|sex|TINYINT|REPLACE|用户性别| -|phone|LARGEINT|REPLACE|用户电话| -|address|VARCHAR(500)|REPLACE|用户住址| -|register_time|DATETIME|REPLACE|用户注册时间| - -及建表语句: - -``` -CREATE TABLE IF NOT EXISTS example_db.expamle_tbl -( - `user_id` LARGEINT NOT NULL COMMENT "用户id", - `username` VARCHAR(50) NOT NULL COMMENT "用户昵称", - `city` VARCHAR(20) REPLACE COMMENT "用户所在城市", - `age` SMALLINT REPLACE COMMENT "用户年龄", - `sex` TINYINT REPLACE COMMENT "用户性别", - `phone` LARGEINT REPLACE COMMENT "用户电话", - `address` VARCHAR(500) REPLACE COMMENT "用户地址", - `register_time` DATETIME REPLACE COMMENT "用户注册时间" -) -AGGREGATE KEY(`user_id`, `username`) -... /* 省略 Partition 和 Distribution 信息 */ -; -``` - -即 Unique 模型完全可以用聚合模型中的 REPLACE 方式替代。其内部的实现方式和数据存储方式也完全一样。这里不再继续举例说明。 - -## Duplicate 模型 - -在某些多维分析场景下,数据既没有主键,也没有聚合需求。因此,我们引入 Duplicate 数据模型来满足这类需求。举例说明。 - -|ColumnName|Type|SortKey|Comment| -|---|---|---|---| -|timestamp|DATETIME|Yes|日志时间| -|type|INT|Yes|日志类型| -|error_code|INT|Yes|错误码| -|error_msg|VARCHAR(1024)|No|错误详细信息| -|op_id|BIGINT|No|负责人id| -|op_time|DATETIME|No|处理时间| - -建表语句如下: - -``` -CREATE TABLE IF NOT EXISTS example_db.expamle_tbl -( - `timestamp` DATETIME NOT NULL COMMENT "日志时间", - `type` INT NOT NULL COMMENT "日志类型", - `error_code` INT COMMENT "错误码", - `error_msg` VARCHAR(1024) COMMENT "错误详细信息", - `op_id` BIGINT COMMENT "负责人id", - `op_time` DATETIME COMMENT "处理时间" -) -DUPLICATE KEY(`timestamp`, `type`) -... /* 省略 Partition 和 Distribution 信息 */ -; -``` - -这种数据模型区别于 Aggregate 和 Unique 模型。数据完全按照导入文件中的数据进行存储,不会有任何聚合。即使两行数据完全相同,也都会保留。 -而在建表语句中指定的 DUPLICATE KEY,只是用来指明底层数据按照那些列进行排序。(更贴切的名称应该为 “Sorted Column”,这里取名 “DUPLICATE KEY” 只是用以明确表示所用的数据模型。关于 “Sorted Column”的更多解释,可以参阅 [前綴索引](https://doris.apache.org/zh-CN/getting-started/data-model-rollup.html#%E5%89%8D%E7%BC%80%E7%B4%A2%E5%BC%95) 小节。在 DUPLICATE KEY 的选择上,我们建议适当的选择前 2-4 列就可以。 - -这种数据模型适用于既没有聚合需求,又没有主键唯一性约束的原始数据的存储。更多使用场景,可参阅 [聚合模型的局限性](https://doris.apache.org/zh-CN/getting-started/data-model-rollup.html#%E8%81%9A%E5%90%88%E6%A8%A1%E5%9E%8B%E7%9A%84%E5%B1%80%E9%99%90%E6%80%A7) 小节。 - -## ROLLUP - -ROLLUP 在多维分析中是“上卷”的意思,即将数据按某种指定的粒度进行进一步聚合。 - -### 基本概念 - -在 Doris 中,我们将用户通过建表语句创建出来的表称为 Base 表(Base Table)。Base 表中保存着按用户建表语句指定的方式存储的基础数据。 - -在 Base 表之上,我们可以创建任意多个 ROLLUP 表。这些 ROLLUP 的数据是基于 Base 表产生的,并且在物理上是**独立存储**的。 - -ROLLUP 表的基本作用,在于在 Base 表的基础上,获得更粗粒度的聚合数据。 - -下面我们用示例详细说明在不同数据模型中的 ROLLUP 表及其作用。 - -#### Aggregate 和 Unique 模型中的 ROLLUP - -因为 Unique 只是 Aggregate 模型的一个特例,所以这里我们不加以区别。 - -1. 示例1:获得每个用户的总消费 - -接 **Aggregate 模型**小节的**示例2**,Base 表结构如下: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -|user_id|LARGEINT||用户id| -|date|DATE||数据灌入日期| -|timestamp|DATETIME||数据灌入时间,精确到秒| -|city|VARCHAR(20)||用户所在城市| -|age|SMALLINT||用户年龄| -|sex|TINYINT||用户性别| -|last_visit_date|DATETIME|REPLACE|用户最后一次访问时间| -|cost|BIGINT|SUM|用户总消费| -|max\_dwell\_time|INT|MAX|用户最大停留时间| -|min\_dwell\_time|INT|MIN|用户最小停留时间| - -存储的数据如下: - -|user_id|date|timestamp|city|age|sex|last\_visit\_date|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---|---|---|---|---|---| -|10000|2017-10-01|2017-10-01 08:00:05|北京|20|0|2017-10-01 06:00:00|20|10|10| -|10000|2017-10-01|2017-10-01 09:00:05|北京|20|0|2017-10-01 07:00:00|15|2|2| -|10001|2017-10-01|2017-10-01 18:12:10|北京|30|1|2017-10-01 17:05:45|2|22|22| -|10002|2017-10-02|2017-10-02 13:10:00|上海|20|1|2017-10-02 12:59:12|200|5|5| -|10003|2017-10-02|2017-10-02 13:15:00|广州|32|0|2017-10-02 11:20:00|30|11|11| -|10004|2017-10-01|2017-10-01 12:12:48|深圳|35|0|2017-10-01 10:00:15|100|3|3| -|10004|2017-10-03|2017-10-03 12:38:20|深圳|35|0|2017-10-03 10:20:22|11|6|6| - -在此基础上,我们创建一个 ROLLUP: - -|ColumnName| -|---| -|user_id| -|cost| - -该 ROLLUP 只包含两列:user_id 和 cost。则创建完成后,该 ROLLUP 中存储的数据如下: - -|user\_id|cost| -|---|---| -|10000|35| -|10001|2| -|10002|200| -|10003|30| -|10004|111| - -可以看到,ROLLUP 中仅保留了每个 user_id,在 cost 列上的 SUM 的结果。那么当我们进行如下查询时: - -`SELECT user_id, sum(cost) FROM table GROUP BY user_id;` - -Doris 会自动命中这个 ROLLUP 表,从而只需扫描极少的数据量,即可完成这次聚合查询。 - -2. 示例2:获得不同城市,不同年龄段用户的总消费、最长和最短页面驻留时间 - -紧接示例1。我们在 Base 表基础之上,再创建一个 ROLLUP: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -|city|VARCHAR(20)||用户所在城市| -|age|SMALLINT||用户年龄| -|cost|BIGINT|SUM|用户总消费| -|max\_dwell\_time|INT|MAX|用户最大停留时间| -|min\_dwell\_time|INT|MIN|用户最小停留时间| - -则创建完成后,该 ROLLUP 中存储的数据如下: - -|city|age|cost|max\_dwell\_time|min\_dwell\_time| -|---|---|---|---|---| -|北京|20|35|10|2| -|北京|30|2|22|22| -|上海|20|200|5|5| -|广州|32|30|11|11| -|深圳|35|111|6|3| - -当我们进行如下这些查询时: - -* `SELECT city, age, sum(cost), max(max_dwell_time), min(min_dwell_time) FROM table GROUP BY city, age;` -* `SELECT city, sum(cost), max(max_dwell_time), min(min_dwell_time) FROM table GROUP BY city;` -* `SELECT city, age, sum(cost), min(min_dwell_time) FROM table GROUP BY city, age;` - -Doris 会自动命中这个 ROLLUP 表。 - -#### Duplicate 模型中的 ROLLUP - -因为 Duplicate 模型没有聚合的语意。所以该模型中的 ROLLUP,已经失去了“上卷”这一层含义。而仅仅是作为调整列顺序,以命中前缀索引的作用。我们将在接下来的小节中,详细介绍前缀索引,以及如何使用ROLLUP改变前缀索引,以获得更好的查询效率。 - -### 前缀索引与 ROLLUP - -#### 前缀索引 - -不同于传统的数据库设计,Doris 不支持在任意列上创建索引。Doris 这类 MPP 架构的 OLAP 数据库,通常都是通过提高并发,来处理大量数据的。 -本质上,Doris 的数据存储在类似 SSTable(Sorted String Table)的数据结构中。该结构是一种有序的数据结构,可以按照指定的列进行排序存储。在这种数据结构上,以排序列作为条件进行查找,会非常的高效。 - -在 Aggregate、Unique 和 Duplicate 三种数据模型中。底层的数据存储,是按照各自建表语句中,AGGREGATE KEY、UNIQUE KEY 和 DUPLICATE KEY 中指定的列进行排序存储的。 - -而前缀索引,即在排序的基础上,实现的一种根据给定前缀列,快速查询数据的索引方式。 - -我们将一行数据的前 **36 个字节** 作为这行数据的前缀索引。当遇到 VARCHAR 类型时,前缀索引会直接截断。我们举例说明: - -1. 以下表结构的前缀索引为 user_id(8 Bytes) + age(4 Bytes) + message(prefix 20 Bytes)。 - -|ColumnName|Type| -|---|---| -|user_id|BIGINT| -|age|INT| -|message|VARCHAR(100)| -|max\_dwell\_time|DATETIME| -|min\_dwell\_time|DATETIME| - -2. 以下表结构的前缀索引为 user_name(20 Bytes)。即使没有达到 36 个字节,因为遇到 VARCHAR,所以直接截断,不再往后继续。 - -|ColumnName|Type| -|---|---| -|user_name|VARCHAR(20)| -|age|INT| -|message|VARCHAR(100)| -|max\_dwell\_time|DATETIME| -|min\_dwell\_time|DATETIME| - -当我们的查询条件,是**前缀索引的前缀**时,可以极大的加快查询速度。比如在第一个例子中,我们执行如下查询: - -`SELECT * FROM table WHERE user_id=1829239 and age=20;` - -该查询的效率会**远高于**如下查询: - -`SELECT * FROM table WHERE age=20;` - -所以在建表时,**正确的选择列顺序,能够极大地提高查询效率**。 - -#### ROLLUP 调整前缀索引 - -因为建表时已经指定了列顺序,所以一个表只有一种前缀索引。这对于使用其他不能命中前缀索引的列作为条件进行的查询来说,效率上可能无法满足需求。因此,我们可以通过创建 ROLLUP 来人为的调整列顺序。举例说明。 - -Base 表结构如下: - -|ColumnName|Type| -|---|---| -|user\_id|BIGINT| -|age|INT| -|message|VARCHAR(100)| -|max\_dwell\_time|DATETIME| -|min\_dwell\_time|DATETIME| - -我们可以在此基础上创建一个 ROLLUP 表: - -|ColumnName|Type| -|---|---| -|age|INT| -|user\_id|BIGINT| -|message|VARCHAR(100)| -|max\_dwell\_time|DATETIME| -|min\_dwell\_time|DATETIME| - -可以看到,ROLLUP 和 Base 表的列完全一样,只是将 user_id 和 age 的顺序调换了。那么当我们进行如下查询时: - -`SELECT * FROM table where age=20 and message LIKE "%error%";` - -会优先选择 ROLLUP 表,因为 ROLLUP 的前缀索引匹配度更高。 - -### ROLLUP 的几点说明 - -* ROLLUP 最根本的作用是提高某些查询的查询效率(无论是通过聚合来减少数据量,还是修改列顺序以匹配前缀索引)。因此 ROLLUP 的含义已经超出了 “上卷” 的范围。这也是为什么我们在源代码中,将其命名为 Materialized Index(物化索引)的原因。 -* ROLLUP 是附属于 Base 表的,可以看做是 Base 表的一种辅助数据结构。用户可以在 Base 表的基础上,创建或删除 ROLLUP,但是不能在查询中显式的指定查询某 ROLLUP。是否命中 ROLLUP 完全由 Doris 系统自动决定。 -* ROLLUP 的数据是独立物理存储的。因此,创建的 ROLLUP 越多,占用的磁盘空间也就越大。同时对导入速度也会有影响(导入的ETL阶段会自动产生所有 ROLLUP 的数据),但是不会降低查询效率(只会更好)。 -* ROLLUP 的数据更新与 Base 表是完全同步的。用户无需关心这个问题。 -* ROLLUP 中列的聚合方式,与 Base 表完全相同。在创建 ROLLUP 无需指定,也不能修改。 -* 查询能否命中 ROLLUP 的一个必要条件(非充分条件)是,查询所涉及的**所有列**(包括 select list 和 where 中的查询条件列等)都存在于该 ROLLUP 的列中。否则,查询只能命中 Base 表。 -* 某些类型的查询(如 count(*))在任何条件下,都无法命中 ROLLUP。具体参见接下来的 **聚合模型的局限性** 一节。 -* 可以通过 `EXPLAIN your_sql;` 命令获得查询执行计划,在执行计划中,查看是否命中 ROLLUP。 -* 可以通过 `DESC tbl_name ALL;` 语句显示 Base 表和所有已创建完成的 ROLLUP。 - -在这篇文档中可以查看 [查询如何命中 Rollup](hit-the-rollup) - -## 聚合模型的局限性 - -这里我们针对 Aggregate 模型(包括 Unique 模型),来介绍下聚合模型的局限性。 - -在聚合模型中,模型对外展现的,是**最终聚合后的**数据。也就是说,任何还未聚合的数据(比如说两个不同导入批次的数据),必须通过某种方式,以保证对外展示的一致性。我们举例说明。 - -假设表结构如下: - -|ColumnName|Type|AggregationType|Comment| -|---|---|---|---| -|user\_id|LARGEINT||用户id| -|date|DATE||数据灌入日期| -|cost|BIGINT|SUM|用户总消费| - -假设存储引擎中有如下两个已经导入完成的批次的数据: - -**batch 1** - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|50| -|10002|2017-11-21|39| - -**batch 2** - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|1| -|10001|2017-11-21|5| -|10003|2017-11-22|22| - -可以看到,用户 10001 分属在两个导入批次中的数据还没有聚合。但是为了保证用户只能查询到如下最终聚合后的数据: - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|51| -|10001|2017-11-21|5| -|10002|2017-11-21|39| -|10003|2017-11-22|22| - -我们在查询引擎中加入了聚合算子,来保证数据对外的一致性。 - -另外,在聚合列(Value)上,执行与聚合类型不一致的聚合类查询时,要注意语意。比如我们在如上示例中执行如下查询: - -`SELECT MIN(cost) FROM table;` - -得到的结果是 5,而不是 1。 - -同时,这种一致性保证,在某些查询中,会极大的降低查询效率。 - -我们以最基本的 count(*) 查询为例: - -`SELECT COUNT(*) FROM table;` - -在其他数据库中,这类查询都会很快的返回结果。因为在实现上,我们可以通过如“导入时对行进行计数,保存 count 的统计信息”,或者在查询时“仅扫描某一列数据,获得 count 值”的方式,只需很小的开销,即可获得查询结果。但是在 Doris 的聚合模型中,这种查询的开销**非常大**。 - -我们以刚才的数据为例: - -**batch 1** - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|50| -|10002|2017-11-21|39| - -**batch 2** - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|1| -|10001|2017-11-21|5| -|10003|2017-11-22|22| - -因为最终的聚合结果为: - -|user\_id|date|cost| -|---|---|---| -|10001|2017-11-20|51| -|10001|2017-11-21|5| -|10002|2017-11-21|39| -|10003|2017-11-22|22| - -所以,`select count(*) from table;` 的正确结果应该为 **4**。但如果我们只扫描 `user_id` 这一列,如果加上查询时聚合,最终得到的结果是 **3**(10001, 10002, 10003)。而如果不加查询时聚合,则得到的结果是 **5**(两批次一共5行数据)。可见这两个结果都是不对的。 - -为了得到正确的结果,我们必须同时读取 `user_id` 和 `date` 这两列的数据,**再加上查询时聚合**,才能返回 **4** 这个正确的结果。也就是说,在 count(\*) 查询中,Doris 必须扫描所有的 AGGREGATE KEY 列(这里就是 `user_id` 和 `date`),并且聚合后,才能得到语意正确的结果。当聚合列非常多时,count(\*) 查询需要扫描大量的数据。 - -因此,当业务上有频繁的 count(\*) 查询时,我们建议用户通过增加一个**值恒为 1 的,聚合类型为 SUM 的列来模拟 count(\*)**。如刚才的例子中的表结构,我们修改如下: - -|ColumnName|Type|AggregateType|Comment| -|---|---|---|---| -|user\_id|BIGINT||用户id| -|date|DATE||数据灌入日期| -|cost|BIGINT|SUM|用户总消费| -|count|BIGINT|SUM|用于计算count| - -增加一个 count 列,并且导入数据中,该列值**恒为 1**。则 `select count(*) from table;` 的结果等价于 `select sum(count) from table;`。而后者的查询效率将远高于前者。不过这种方式也有使用限制,就是用户需要自行保证,不会重复导入 AGGREGATE KEY 列都相同的行。否则,`select sum(count) from table;` 只能表述原始导入的行数,而不是 `select count(*) from table;` 的语义。 - -另一种方式,就是 **将如上的 `count` 列的聚合类型改为 REPLACE,且依然值恒为 1**。那么 `select sum(count) from table;` 和 `select count(*) from table;` 的结果将是一致的。并且这种方式,没有导入重复行的限制。 - -### Duplicate 模型 - -Duplicate 模型没有聚合模型的这个局限性。因为该模型不涉及聚合语意,在做 count(*) 查询时,任意选择一列查询,即可得到语意正确的结果。 - -## 数据模型的选择建议 - -因为数据模型在建表时就已经确定,且**无法修改**。所以,选择一个合适的数据模型**非常重要**。 - -1. Aggregate 模型可以通过预聚合,极大地降低聚合查询时所需扫描的数据量和查询的计算量,非常适合有固定模式的报表类查询场景。但是该模型对 count(*) 查询很不友好。同时因为固定了 Value 列上的聚合方式,在进行其他类型的聚合查询时,需要考虑语意正确性。 -2. Unique 模型针对需要唯一主键约束的场景,可以保证主键唯一性约束。但是无法利用 ROLLUP 等预聚合带来的查询优势(因为本质是 REPLACE,没有 SUM 这种聚合方式)。 -3. Duplicate 适合任意维度的 Ad-hoc 查询。虽然同样无法利用预聚合的特性,但是不受聚合模型的约束,可以发挥列存模型的优势(只读取相关列,而不需要读取所有 Key 列)。 diff --git a/docs/zh-CN/getting-started/data-partition.md b/docs/zh-CN/getting-started/data-partition.md deleted file mode 100644 index 7f5a75f12d..0000000000 --- a/docs/zh-CN/getting-started/data-partition.md +++ /dev/null @@ -1,401 +0,0 @@ ---- -{ - "title": "数据划分", - "language": "zh-CN" -} ---- - - - -# 数据划分 - -本文档主要介绍 Doris 的建表和数据划分,以及建表操作中可能遇到的问题和解决方法。 - -## 基本概念 - -在 Doris 中,数据都以表(Table)的形式进行逻辑上的描述。 - -### Row & Column - -一张表包括行(Row)和列(Column)。Row 即用户的一行数据。Column 用于描述一行数据中不同的字段。 - -Column 可以分为两大类:Key 和 Value。从业务角度看,Key 和 Value 可以分别对应维度列和指标列。从聚合模型的角度来说,Key 列相同的行,会聚合成一行。其中 Value 列的聚合方式由用户在建表时指定。关于更多聚合模型的介绍,可以参阅 [Doris 数据模型](./data-model-rollup.md)。 - -### Tablet & Partition - -在 Doris 的存储引擎中,用户数据被水平划分为若干个数据分片(Tablet,也称作数据分桶)。每个 Tablet 包含若干数据行。各个 Tablet 之间的数据没有交集,并且在物理上是独立存储的。 - -多个 Tablet 在逻辑上归属于不同的分区(Partition)。一个 Tablet 只属于一个 Partition。而一个 Partition 包含若干个 Tablet。因为 Tablet 在物理上是独立存储的,所以可以视为 Partition 在物理上也是独立。Tablet 是数据移动、复制等操作的最小物理存储单元。 - -若干个 Partition 组成一个 Table。Partition 可以视为是逻辑上最小的管理单元。数据的导入与删除,都可以或仅能针对一个 Partition 进行。 - -## 数据划分 - -我们以一个建表操作来说明 Doris 的数据划分。 - -Doris 的建表是一个同步命令,命令返回成功,即表示建表成功。 - -可以通过 `HELP CREATE TABLE;` 查看更多帮助。 - -本小节通过一个例子,来介绍 Doris 的建表方式。 - -``` --- Range Partition - -CREATE TABLE IF NOT EXISTS example_db.expamle_range_tbl -( - `user_id` LARGEINT NOT NULL COMMENT "用户id", - `date` DATE NOT NULL COMMENT "数据灌入日期时间", - `timestamp` DATETIME NOT NULL COMMENT "数据灌入的时间戳", - `city` VARCHAR(20) COMMENT "用户所在城市", - `age` SMALLINT COMMENT "用户年龄", - `sex` TINYINT COMMENT "用户性别", - `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间", - `cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费", - `max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间", - `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间" -) -ENGINE=olap -AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`) -PARTITION BY RANGE(`date`) -( - PARTITION `p201701` VALUES LESS THAN ("2017-02-01"), - PARTITION `p201702` VALUES LESS THAN ("2017-03-01"), - PARTITION `p201703` VALUES LESS THAN ("2017-04-01") -) -DISTRIBUTED BY HASH(`user_id`) BUCKETS 16 -PROPERTIES -( - "replication_num" = "3", - "storage_medium" = "SSD", - "storage_cooldown_time" = "2018-01-01 12:00:00" -); - - --- List Partition - -CREATE TABLE IF NOT EXISTS example_db.expamle_list_tbl -( - `user_id` LARGEINT NOT NULL COMMENT "用户id", - `date` DATE NOT NULL COMMENT "数据灌入日期时间", - `timestamp` DATETIME NOT NULL COMMENT "数据灌入的时间戳", - `city` VARCHAR(20) COMMENT "用户所在城市", - `age` SMALLINT COMMENT "用户年龄", - `sex` TINYINT COMMENT "用户性别", - `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间", - `cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费", - `max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间", - `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间" -) -ENGINE=olap -AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`) -PARTITION BY LIST(`city`) -( - PARTITION `p_cn` VALUES IN ("Beijing", "Shanghai", "Hong Kong"), - PARTITION `p_usa` VALUES IN ("New York", "San Francisco"), - PARTITION `p_jp` VALUES IN ("Tokyo") -) -DISTRIBUTED BY HASH(`user_id`) BUCKETS 16 -PROPERTIES -( - "replication_num" = "3", - "storage_medium" = "SSD", - "storage_cooldown_time" = "2018-01-01 12:00:00" -); - -``` - -### 列定义 - -这里我们只以 AGGREGATE KEY 数据模型为例进行说明。更多数据模型参阅 [Doris 数据模型](./data-model-rollup.md)。 - -列的基本类型,可以通过在 mysql-client 中执行 `HELP CREATE TABLE;` 查看。 - -AGGREGATE KEY 数据模型中,所有没有指定聚合方式(SUM、REPLACE、MAX、MIN)的列视为 Key 列。而其余则为 Value 列。 - -定义列时,可参照如下建议: - -1. Key 列必须在所有 Value 列之前。 -2. 尽量选择整型类型。因为整型类型的计算和查找比较效率远高于字符串。 -3. 对于不同长度的整型类型的选择原则,遵循 **够用即可**。 -4. 对于 VARCHAR 和 STRING 类型的长度,遵循 **够用即可**。 -5. 所有列的总字节长度(包括 Key 和 Value)不能超过 100KB。 - -### 分区与分桶 - -Doris 支持两层的数据划分。第一层是 Partition,支持 Range 和 List 的划分方式。第二层是 Bucket(Tablet),仅支持 Hash 的划分方式。 - -也可以仅使用一层分区。使用一层分区时,只支持 Bucket 划分。 - -1. Partition - - * Partition 列可以指定一列或多列。分区类必须为 KEY 列。多列分区的使用方式在后面 **多列分区** 小结介绍。 - * 不论分区列是什么类型,在写分区值时,都需要加双引号。 - * 分区数量理论上没有上限。 - * 当不使用 Partition 建表时,系统会自动生成一个和表名同名的,全值范围的 Partition。该 Partition 对用户不可见,并且不可删改。 - - #### Range 分区 - - * 分区列通常为时间列,以方便的管理新旧数据。 - * Partition 支持通过 `VALUES LESS THAN (...)` 仅指定上界,系统会将前一个分区的上界作为该分区的下界,生成一个左闭右开的区间。通过,也支持通过 `VALUES [...)` 指定同时指定上下界,生成一个左闭右开的区间。 - - * 通过 `VALUES [...)` 同时指定上下界比较容易理解。这里举例说明,当使用 `VALUES LESS THAN (...)` 语句进行分区的增删操作时,分区范围的变化情况: - - * 如上 `expamle_range_tbl` 示例,当建表完成后,会自动生成如下3个分区: - - ``` - p201701: [MIN_VALUE, 2017-02-01) - p201702: [2017-02-01, 2017-03-01) - p201703: [2017-03-01, 2017-04-01) - ``` - - * 当我们增加一个分区 p201705 VALUES LESS THAN ("2017-06-01"),分区结果如下: - - ``` - p201701: [MIN_VALUE, 2017-02-01) - p201702: [2017-02-01, 2017-03-01) - p201703: [2017-03-01, 2017-04-01) - p201705: [2017-04-01, 2017-06-01) - ``` - - * 此时我们删除分区 p201703,则分区结果如下: - - ``` - p201701: [MIN_VALUE, 2017-02-01) - p201702: [2017-02-01, 2017-03-01) - p201705: [2017-04-01, 2017-06-01) - ``` - - > 注意到 p201702 和 p201705 的分区范围并没有发生变化,而这两个分区之间,出现了一个空洞:[2017-03-01, 2017-04-01)。即如果导入的数据范围在这个空洞范围内,是无法导入的。 - - * 继续删除分区 p201702,分区结果如下: - - ``` - p201701: [MIN_VALUE, 2017-02-01) - p201705: [2017-04-01, 2017-06-01) - 空洞范围变为:[2017-02-01, 2017-04-01) - ``` - - * 现在增加一个分区 p201702new VALUES LESS THAN ("2017-03-01"),分区结果如下: - - ``` - p201701: [MIN_VALUE, 2017-02-01) - p201702new: [2017-02-01, 2017-03-01) - p201705: [2017-04-01, 2017-06-01) - ``` - - > 可以看到空洞范围缩小为:[2017-03-01, 2017-04-01) - - * 现在删除分区 p201701,并添加分区 p201612 VALUES LESS THAN ("2017-01-01"),分区结果如下: - - ``` - p201612: [MIN_VALUE, 2017-01-01) - p201702new: [2017-02-01, 2017-03-01) - p201705: [2017-04-01, 2017-06-01) - ``` - - > 即出现了一个新的空洞:[2017-01-01, 2017-02-01) - - 综上,分区的删除不会改变已存在分区的范围。删除分区可能出现空洞。通过 `VALUES LESS THAN` 语句增加分区时,分区的下界紧接上一个分区的上界。 - - 不可添加范围重叠的分区。 - - #### List 分区 - - * 分区列支持 `BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME, CHAR, VARCHAR` 数据类型,分区值为枚举值。只有当数据为目标分区枚举值其中之一时,才可以命中分区。 - * Partition 支持通过 `VALUES IN (...)` 来指定每个分区包含的枚举值。 - * 下面通过示例说明,进行分区的增删操作时,分区的变化。 - - * 如上 `example_list_tbl` 示例,当建表完成后,会自动生成如下3个分区: - - ``` - p_cn: ("Beijing", "Shanghai", "Hong Kong") - p_usa: ("New York", "San Francisco") - p_jp: ("Tokyo") - ``` - - * 当我们增加一个分区 p_uk VALUES IN ("London"),分区结果如下: - - ``` - p_cn: ("Beijing", "Shanghai", "Hong Kong") - p_usa: ("New York", "San Francisco") - p_jp: ("Tokyo") - p_uk: ("London") - ``` - - * 当我们删除分区 p_jp,分区结果如下: - - ``` - p_cn: ("Beijing", "Shanghai", "Hong Kong") - p_usa: ("New York", "San Francisco") - p_uk: ("London") - ``` - - 不可添加范围重叠的分区。 - -2. Bucket - - * 如果使用了 Partition,则 `DISTRIBUTED ...` 语句描述的是数据在**各个分区内**的划分规则。如果不使用 Partition,则描述的是对整个表的数据的划分规则。 - * 分桶列可以是多列,但必须为 Key 列。分桶列可以和 Partition 列相同或不同。 - * 分桶列的选择,是在 **查询吞吐** 和 **查询并发** 之间的一种权衡: - 1. 如果选择多个分桶列,则数据分布更均匀。如果一个查询条件不包含所有分桶列的等值条件,那么该查询会触发所有分桶同时扫描,这样查询的吞吐会增加,单个查询的延迟随之降低。这个方式适合大吞吐低并发的查询场景。 - 2. 如果仅选择一个或少数分桶列,则对应的点查询可以仅触发一个分桶扫描。此时,当多个点查询并发时,这些查询有较大的概率分别触发不同的分桶扫描,各个查询之间的IO影响较小(尤其当不同桶分布在不同磁盘上时),所以这种方式适合高并发的点查询场景。 - - * 分桶的数量理论上没有上限。 - -3. 关于 Partition 和 Bucket 的数量和数据量的建议。 - - * 一个表的 Tablet 总数量等于 (Partition num * Bucket num)。 - * 一个表的 Tablet 数量,在不考虑扩容的情况下,推荐略多于整个集群的磁盘数量。 - * 单个 Tablet 的数据量理论上没有上下界,但建议在 1G - 10G 的范围内。如果单个 Tablet 数据量过小,则数据的聚合效果不佳,且元数据管理压力大。如果数据量过大,则不利于副本的迁移、补齐,且会增加 Schema Change 或者 Rollup 操作失败重试的代价(这些操作失败重试的粒度是 Tablet)。 - * 当 Tablet 的数据量原则和数量原则冲突时,建议优先考虑数据量原则。 - * 在建表时,每个分区的 Bucket 数量统一指定。但是在动态增加分区时(`ADD PARTITION`),可以单独指定新分区的 Bucket 数量。可以利用这个功能方便的应对数据缩小或膨胀。 - * 一个 Partition 的 Bucket 数量一旦指定,不可更改。所以在确定 Bucket 数量时,需要预先考虑集群扩容的情况。比如当前只有 3 台 host,每台 host 有 1 块盘。如果 Bucket 的数量只设置为 3 或更小,那么后期即使再增加机器,也不能提高并发度。 - * 举一些例子:假设在有10台BE,每台BE一块磁盘的情况下。如果一个表总大小为 500MB,则可以考虑4-8个分片。5GB:8-16个。50GB:32个。500GB:建议分区,每个分区大小在 50GB 左右,每个分区16-32个分片。5TB:建议分区,每个分区大小在 50GB 左右,每个分区16-32个分片。 - - > 注:表的数据量可以通过 `show data` 命令查看,结果除以副本数,即表的数据量。 - -#### 多列分区 - -Doris 支持指定多列作为分区列,示例如下: - -##### Range 分区 - -``` - PARTITION BY RANGE(`date`, `id`) - ( - PARTITION `p201701_1000` VALUES LESS THAN ("2017-02-01", "1000"), - PARTITION `p201702_2000` VALUES LESS THAN ("2017-03-01", "2000"), - PARTITION `p201703_all` VALUES LESS THAN ("2017-04-01") - ) -``` - - 在以上示例中,我们指定 `date`(DATE 类型) 和 `id`(INT 类型) 作为分区列。以上示例最终得到的分区如下: - -``` - p201701_1000: [(MIN_VALUE, MIN_VALUE), ("2017-02-01", "1000") ) - p201702_2000: [("2017-02-01", "1000"), ("2017-03-01", "2000") ) - p201703_all: [("2017-03-01", "2000"), ("2017-04-01", MIN_VALUE)) -``` - -注意,最后一个分区用户缺省只指定了 `date` 列的分区值,所以 `id` 列的分区值会默认填充 `MIN_VALUE`。当用户插入数据时,分区列值会按照顺序依次比较,最终得到对应的分区。举例如下: - -``` - 数据 --> 分区 - 2017-01-01, 200 --> p201701_1000 - 2017-01-01, 2000 --> p201701_1000 - 2017-02-01, 100 --> p201701_1000 - 2017-02-01, 2000 --> p201702_2000 - 2017-02-15, 5000 --> p201702_2000 - 2017-03-01, 2000 --> p201703_all - 2017-03-10, 1 --> p201703_all - 2017-04-01, 1000 --> 无法导入 - 2017-05-01, 1000 --> 无法导入 -``` - -##### List 分区 - -``` - PARTITION BY LIST(`id`, `city`) - ( - PARTITION `p1_city` VALUES IN (("1", "Beijing"), ("1", "Shanghai")), - PARTITION `p2_city` VALUES IN (("2", "Beijing"), ("2", "Shanghai")), - PARTITION `p3_city` VALUES IN (("3", "Beijing"), ("3", "Shanghai")) - ) -``` - -在以上示例中,我们指定 `id`(INT 类型) 和 `city`(VARCHAR 类型) 作为分区列。以上示例最终得到的分区如下: - -``` - p1_city: [("1", "Beijing"), ("1", "Shanghai")] - p2_city: [("2", "Beijing"), ("2", "Shanghai")] - p3_city: [("3", "Beijing"), ("3", "Shanghai")] -``` - -当用户插入数据时,分区列值会按照顺序依次比较,最终得到对应的分区。举例如下: - -``` - 数据 ---> 分区 - 1, Beijing ---> p1_city - 1, Shanghai ---> p1_city - 2, Shanghai ---> p2_city - 3, Beijing ---> p3_city - 1, Tianjin ---> 无法导入 - 4, Beijing ---> 无法导入 -``` - -### PROPERTIES - -在建表语句的最后 PROPERTIES 中,可以指定以下两个参数: - -1. replication_num - - * 每个 Tablet 的副本数量。默认为3,建议保持默认即可。在建表语句中,所有 Partition 中的 Tablet 副本数量统一指定。而在增加新分区时,可以单独指定新分区中 Tablet 的副本数量。 - * 副本数量可以在运行时修改。强烈建议保持奇数。 - * 最大副本数量取决于集群中独立 IP 的数量(注意不是 BE 数量)。Doris 中副本分布的原则是,不允许同一个 Tablet 的副本分布在同一台物理机上,而识别物理机即通过 IP。所以,即使在同一台物理机上部署了 3 个或更多 BE 实例,如果这些 BE 的 IP 相同,则依然只能设置副本数为 1。 - * 对于一些小,并且更新不频繁的维度表,可以考虑设置更多的副本数。这样在 Join 查询时,可以有更大的概率进行本地数据 Join。 - -2. storage_medium & storage\_cooldown\_time - - * BE 的数据存储目录可以显式的指定为 SSD 或者 HDD(通过 .SSD 或者 .HDD 后缀区分)。建表时,可以统一指定所有 Partition 初始存储的介质。注意,后缀作用是显式指定磁盘介质,而不会检查是否与实际介质类型相符。 - * 默认初始存储介质可通过fe的配置文件 `fe.conf` 中指定 `default_storage_medium=xxx`,如果没有指定,则默认为 HDD。如果指定为 SSD,则数据初始存放在 SSD 上。 - * 如果没有指定 storage\_cooldown\_time,则默认 30 天后,数据会从 SSD 自动迁移到 HDD 上。如果指定了 storage\_cooldown\_time,则在到达 storage_cooldown_time 时间后,数据才会迁移。 - * 注意,当指定 storage_medium 时,如果FE参数 `enable_strict_storage_medium_check` 为 `False` 该参数只是一个“尽力而为”的设置。即使集群内没有设置 SSD 存储介质,也不会报错,而是自动存储在可用的数据目录中。 - 同样,如果 SSD 介质不可访问、空间不足,都可能导致数据初始直接存储在其他可用介质上。而数据到期迁移到 HDD 时,如果 HDD 介质不可访问、空间不足,也可能迁移失败(但是会不断尝试)。 - 如果FE参数 `enable_strict_storage_medium_check` 为 `True` 则当集群内没有设置 SSD 存储介质时,会报错 `Failed to find enough host in all backends with storage medium is SSD`。 - -### ENGINE - -本示例中,ENGINE 的类型是 olap,即默认的 ENGINE 类型。在 Doris 中,只有这个 ENGINE 类型是由 Doris 负责数据管理和存储的。其他 ENGINE 类型,如 mysql、broker、es 等等,本质上只是对外部其他数据库或系统中的表的映射,以保证 Doris 可以读取这些数据。而 Doris 本身并不创建、管理和存储任何非 olap ENGINE 类型的表和数据。 - -### 其他 - - `IF NOT EXISTS` 表示如果没有创建过该表,则创建。注意这里只判断表名是否存在,而不会判断新建表结构是否与已存在的表结构相同。所以如果存在一个同名但不同构的表,该命令也会返回成功,但并不代表已经创建了新的表和新的结构。 - -## 常见问题 - -### 建表操作常见问题 - -1. 如果在较长的建表语句中出现语法错误,可能会出现语法错误提示不全的现象。这里罗列可能的语法错误供手动纠错: - - * 语法结构错误。请仔细阅读 `HELP CREATE TABLE;`,检查相关语法结构。 - * 保留字。当用户自定义名称遇到保留字时,需要用反引号 `` 引起来。建议所有自定义名称使用这个符号引起来。 - * 中文字符或全角字符。非 utf8 编码的中文字符,或隐藏的全角字符(空格,标点等)会导致语法错误。建议使用带有显示不可见字符的文本编辑器进行检查。 - -2. `Failed to create partition [xxx] . Timeout` - - Doris 建表是按照 Partition 粒度依次创建的。当一个 Partition 创建失败时,可能会报这个错误。即使不使用 Partition,当建表出现问题时,也会报 `Failed to create partition`,因为如前文所述,Doris 会为没有指定 Partition 的表创建一个不可更改的默认的 Partition。 - - 当遇到这个错误是,通常是 BE 在创建数据分片时遇到了问题。可以参照以下步骤排查: - - 1. 在 fe.log 中,查找对应时间点的 `Failed to create partition` 日志。在该日志中,会出现一系列类似 `{10001-10010}` 字样的数字对。数字对的第一个数字表示 Backend ID,第二个数字表示 Tablet ID。如上这个数字对,表示 ID 为 10001 的 Backend 上,创建 ID 为 10010 的 Tablet 失败了。 - 2. 前往对应 Backend 的 be.INFO 日志,查找对应时间段内,tablet id 相关的日志,可以找到错误信息。 - 3. 以下罗列一些常见的 tablet 创建失败错误,包括但不限于: - * BE 没有收到相关 task,此时无法在 be.INFO 中找到 tablet id 相关日志。或者 BE 创建成功,但汇报失败。以上问题,请参阅 [部署与升级文档] 检查 FE 和 BE 的连通性。 - * 预分配内存失败。可能是表中一行的字节长度超过了 100KB。 - * `Too many open files`。打开的文件句柄数超过了 Linux 系统限制。需修改 Linux 系统的句柄数限制。 - - 如果创建数据分片时超时,也可以通过在 fe.conf 中设置 `tablet_create_timeout_second=xxx` 以及 `max_create_table_timeout_second=xxx` 来延长超时时间。其中 `tablet_create_timeout_second` 默认是1秒, `max_create_table_timeout_second` 默认是60秒,总体的超时时间为min(tablet_create_timeout_second * replication_num, max_create_table_timeout_second); - -3. 建表命令长时间不返回结果。 - - Doris 的建表命令是同步命令。该命令的超时时间目前设置的比较简单,即(tablet num * replication num)秒。如果创建较多的数据分片,并且其中有分片创建失败,则可能导致等待较长超时后,才会返回错误。 - - 正常情况下,建表语句会在几秒或十几秒内返回。如果超过一分钟,建议直接取消掉这个操作,前往 FE 或 BE 的日志查看相关错误。 diff --git a/docs/zh-CN/getting-started/hit-the-rollup.md b/docs/zh-CN/getting-started/hit-the-rollup.md deleted file mode 100644 index 92c0d65b00..0000000000 --- a/docs/zh-CN/getting-started/hit-the-rollup.md +++ /dev/null @@ -1,296 +0,0 @@ ---- -{ - "title": "Rollup 与查询", - "language": "zh-CN" -} ---- - - - -# Rollup 与查询 - -在 Doris 里 Rollup 作为一份聚合物化视图,其在查询中可以起到两个作用: - -* 索引 -* 聚合数据(仅用于聚合模型,即aggregate key) - -但是为了命中 Rollup 需要满足一定的条件,并且可以通过执行计划中 ScanNode 节点的 PreAggregation 的值来判断是否可以命中 Rollup,以及 Rollup 字段来判断命中的是哪一张 Rollup 表。 - -## 名词解释 - -Base:基表。 - -Rollup:一般指基于 Base 表创建的 Rollup 表,但在一些场景包括 Base 以及 Rollup 表。 - -## 索引 - -前面的查询实践中已经介绍过 Doris 的前缀索引,即 Doris 会把 Base/Rollup 表中的前 36 个字节(有 varchar 类型则可能导致前缀索引不满 36 个字节,varchar 会截断前缀索引,并且最多使用 varchar 的 20 个字节)在底层存储引擎单独生成一份排序的稀疏索引数据(数据也是排序的,用索引定位,然后在数据中做二分查找),然后在查询的时候会根据查询中的条件来匹配每个 Base/Rollup 的前缀索引,并且选择出匹配前缀索引最长的一个 Base/Rollup。 - -``` - -----> 从左到右匹配 -+----+----+----+----+----+----+ -| c1 | c2 | c3 | c4 | c5 |... | -``` - -如上图,取查询中 where 以及 on 上下推到 ScanNode 的条件,从前缀索引的第一列开始匹配,检查条件中是否有这些列,有则累计匹配的长度,直到匹配不上或者36字节结束(varchar类型的列只能匹配20个字节,并且会匹配不足36个字节截断前缀索引),然后选择出匹配长度最长的一个 Base/Rollup,下面举例说明,创建了一张Base表以及四张rollup: - -``` -+---------------+-------+--------------+------+-------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+---------------+-------+--------------+------+-------+---------+-------+ -| test | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup_index1 | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup_index2 | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k1 | TINYINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup_index3 | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup_index4 | k4 | BIGINT | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -+---------------+-------+--------------+------+-------+---------+-------+ -``` - -这五张表的前缀索引分别为 - -``` -Base(k1 ,k2, k3, k4, k5, k6, k7) - -rollup_index1(k9) - -rollup_index2(k9) - -rollup_index3(k4, k5, k6, k1, k2, k3, k7) - -rollup_index4(k4, k6, k5, k1, k2, k3, k7) -``` - -能用的上前缀索引的列上的条件需要是 `=` `<` `>` `<=` `>=` `in` `between` 这些并且这些条件是并列的且关系使用 `and` 连接,对于`or`、`!=` 等这些不能命中,然后看以下查询: - - -`SELECT * FROM test WHERE k1 = 1 AND k2 > 3;` - - -有 k1 以及 k2 上的条件,检查只有 Base 的第一列含有条件里的 k1,所以匹配最长的前缀索引即 test,explain一下: - -``` -| 0:OlapScanNode -| TABLE: test -| PREAGGREGATION: OFF. Reason: No AggregateInfo -| PREDICATES: `k1` = 1, `k2` > 3 -| partitions=1/1 -| rollup: test -| buckets=1/10 -| cardinality=-1 -| avgRowSize=0.0 -| numNodes=0 -| tuple ids: 0 -``` - -再看以下查询: - -`SELECT * FROM test WHERE k4 = 1 AND k5 > 3;` - -有 k4 以及 k5 的条件,检查 rollup_index3、rollup_index4 的第一列含有 k4,但是 rollup_index3 的第二列含有k5,所以匹配的前缀索引最长。 - -``` -| 0:OlapScanNode -| TABLE: test -| PREAGGREGATION: OFF. Reason: No AggregateInfo -| PREDICATES: `k4` = 1, `k5` > 3 -| partitions=1/1 -| rollup: rollup_index3 -| buckets=10/10 -| cardinality=-1 -| avgRowSize=0.0 -| numNodes=0 -| tuple ids: 0 -``` - -现在我们尝试匹配含有 varchar 列上的条件,如下: - -`SELECT * FROM test WHERE k9 IN ("xxx", "yyyy") AND k1 = 10;` - -有 k9 以及 k1 两个条件,rollup_index1 以及 rollup_index2 的第一列都含有 k9,按理说这里选择这两个 rollup 都可以命中前缀索引并且效果是一样的随机选择一个即可(因为这里 varchar 刚好20个字节,前缀索引不足36个字节被截断),但是当前策略这里还会继续匹配 k1,因为 rollup_index1 的第二列为 k1,所以选择了 rollup_index1,其实后面的 k1 条件并不会起到加速的作用。(如果对于前缀索引外的条件需要其可以起到加速查询的目的,可以通过建立 Bloom Filter 过滤器加速。一般对于字符串类型建立即可,因为 Doris 针对列存在 Block 级别对于整形、日期已经有 Min/Max 索引) 以下是 explain 的结果。 - -``` -| 0:OlapScanNode -| TABLE: test -| PREAGGREGATION: OFF. Reason: No AggregateInfo -| PREDICATES: `k9` IN ('xxx', 'yyyy'), `k1` = 10 -| partitions=1/1 -| rollup: rollup_index1 -| buckets=1/10 -| cardinality=-1 -| avgRowSize=0.0 -| numNodes=0 -| tuple ids: 0 -``` - -最后看一个多张Rollup都可以命中的查询: - -`SELECT * FROM test WHERE k4 < 1000 AND k5 = 80 AND k6 >= 10000;` - -有 k4,k5,k6 三个条件,rollup_index3 以及 rollup_index4 的前3列分别含有这三列,所以两者匹配的前缀索引长度一致,选取两者都可以,当前默认的策略为选取了比较早创建的一张 rollup,这里为 rollup_index3。 - -``` -| 0:OlapScanNode -| TABLE: test -| PREAGGREGATION: OFF. Reason: No AggregateInfo -| PREDICATES: `k4` < 1000, `k5` = 80, `k6` >= 10000.0 -| partitions=1/1 -| rollup: rollup_index3 -| buckets=10/10 -| cardinality=-1 -| avgRowSize=0.0 -| numNodes=0 -| tuple ids: 0 -``` - -如果稍微修改上面的查询为: - -`SELECT * FROM test WHERE k4 < 1000 AND k5 = 80 OR k6 >= 10000;` - -则这里的查询不能命中前缀索引。(甚至 Doris 存储引擎内的任何 Min/Max,BloomFilter 索引都不能起作用) - -## 聚合数据 - -当然一般的聚合物化视图其聚合数据的功能是必不可少的,这类物化视图对于聚合类查询或报表类查询都有非常大的帮助,要命中聚合物化视图需要下面一些前提: - -1. 查询或者子查询中涉及的所有列都存在一张独立的 Rollup 中。 -2. 如果查询或者子查询中有 Join,则 Join 的类型需要是 Inner join。 - -以下是可以命中Rollup的一些聚合查询的种类, - -| 列类型 查询类型 | Sum | Distinct/Count Distinct | Min | Max | APPROX_COUNT_DISTINCT | -|--------------|-------|-------------------------|-------|-------|-------| -| Key | false | true | true | true | true | -| Value(Sum) | true | false | false | false | false | -|Value(Replace)| false | false | false | false | false | -| Value(Min) | false | false | true | false | false | -| Value(Max) | false | false | false | true | false | - -如果符合上述条件,则针对聚合模型在判断命中 Rollup 的时候会有两个阶段: - -1. 首先通过条件匹配出命中前缀索引索引最长的 Rollup 表,见上述索引策略。 -2. 然后比较 Rollup 的行数,选择最小的一张 Rollup。 - -如下 Base 表以及 Rollup: - -``` -+-------------+-------+--------------+------+-------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+-------------+-------+--------------+------+-------+---------+-------+ -| test_rollup | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k6 | CHAR(5) | Yes | true | N/A | | -| | k7 | DATE | Yes | true | N/A | | -| | k8 | DATETIME | Yes | true | N/A | | -| | k9 | VARCHAR(20) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup2 | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -| | | | | | | | -| rollup1 | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | true | N/A | | -| | k3 | INT | Yes | true | N/A | | -| | k4 | BIGINT | Yes | true | N/A | | -| | k5 | DECIMAL(9,3) | Yes | true | N/A | | -| | k10 | DOUBLE | Yes | false | N/A | MAX | -| | k11 | FLOAT | Yes | false | N/A | SUM | -+-------------+-------+--------------+------+-------+---------+-------+ -``` - -看以下查询: - -`SELECT SUM(k11) FROM test_rollup WHERE k1 = 10 AND k2 > 200 AND k3 in (1,2,3);` - -首先判断查询是否可以命中聚合的 Rollup表,经过查上面的图是可以的,然后条件中含有 k1,k2,k3 三个条件,这三个条件 test_rollup、rollup1、rollup2 的前三列都含有,所以前缀索引长度一致,然后比较行数显然 rollup2 的聚合程度最高行数最少所以选取 rollup2。 - -``` -| 0:OlapScanNode | -| TABLE: test_rollup | -| PREAGGREGATION: ON | -| PREDICATES: `k1` = 10, `k2` > 200, `k3` IN (1, 2, 3) | -| partitions=1/1 | -| rollup: rollup2 | -| buckets=1/10 | -| cardinality=-1 | -| avgRowSize=0.0 | -| numNodes=0 | -| tuple ids: 0 | -``` diff --git a/new-docs/zh-CN/install/install-deploy.md b/docs/zh-CN/install/install-deploy.md similarity index 100% rename from new-docs/zh-CN/install/install-deploy.md rename to docs/zh-CN/install/install-deploy.md diff --git a/new-docs/zh-CN/install/source-install/compilation-arm.md b/docs/zh-CN/install/source-install/compilation-arm.md similarity index 100% rename from new-docs/zh-CN/install/source-install/compilation-arm.md rename to docs/zh-CN/install/source-install/compilation-arm.md diff --git a/new-docs/zh-CN/install/source-install/compilation-with-ldb-toolchain.md b/docs/zh-CN/install/source-install/compilation-with-ldb-toolchain.md similarity index 100% rename from new-docs/zh-CN/install/source-install/compilation-with-ldb-toolchain.md rename to docs/zh-CN/install/source-install/compilation-with-ldb-toolchain.md diff --git a/new-docs/zh-CN/install/source-install/compilation.md b/docs/zh-CN/install/source-install/compilation.md similarity index 100% rename from new-docs/zh-CN/install/source-install/compilation.md rename to docs/zh-CN/install/source-install/compilation.md diff --git a/docs/zh-CN/installing/compilation-arm.md b/docs/zh-CN/installing/compilation-arm.md deleted file mode 100644 index e0bcc57c5e..0000000000 --- a/docs/zh-CN/installing/compilation-arm.md +++ /dev/null @@ -1,256 +0,0 @@ ---- -{ - "title": "在ARM平台上编译", - "language": "zh-CN" -} ---- - - - -# ARM64 + KylinOS 编译运行 Doris - -本文档介绍如何在 ARM64 平台上编译 Doris。 - -注意,该文档仅作为指导性文档。在不同环境中编译可能出现其他错误。 - -## 软硬件环境 - -1. KylinOS 版本: - - ``` - $> cat /etc/.kyinfo - name=Kylin-Server - milestone=10-SP1-Release-Build04-20200711 - arch=arm64 - beta=False - time=2020-07-11 17:16:54 - dist_id=Kylin-Server-10-SP1-Release-Build04-20200711-arm64-2020-07-11 17:16:54 - ``` - -2. CPU型号 - - ``` - $> cat /proc/cpuinfo - model name : Phytium,FT-2000+/64 - ``` - -## 使用 ldb-toolchain 编译 - -该方法适用于 [commit 7f3564](https://github.com/apache/incubator-doris/commit/7f3564cca62de49c9f2ea67fcf735921dbebb4d1) 之后的 Doris 版本。 - -下载 [ldb\_toolchain\_gen.aarch64.sh](https://github.com/amosbird/ldb_toolchain_gen/releases/download/v0.9.1/ldb_toolchain_gen.aarch64.sh) - -之后的编译方式参阅 [使用 LDB toolchain 编译](./compilation-with-ldb-toolchain.md) - -注意其中 jdk 和 nodejs 都需要下载对应的 aarch64 版本: - -1. [Java8-aarch64](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/jdk-8u291-linux-aarch64.tar.gz) -2. [Node v12.13.0-aarch64](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/node-v16.3.0-linux-arm64.tar.xz) - -## ~~使用 GCC 10 编译(已废弃)~~ - -该方法仅适用于 [commit 68bab73](https://github.com/apache/incubator-doris/commit/68bab73c359e40bf485a663e9a6e6ee76d81d382) 之前的 Doris 源码。 - -### 编译工具安装(无网络) - -示例中,所有工具安装在在 `/home/doris/tools/installed/` 目录下。 - -所需安装包请先在有网络情况下获取。 - -#### 1. 安装gcc10 - -下载 gcc-10.1.0 - -``` -wget https://mirrors.tuna.tsinghua.edu.cn/gnu/gcc/gcc-10.1.0/gcc-10.1.0.tar.gz -``` - -解压后,在 `contrib/download_prerequisites` 查看依赖并下载: - -``` -http://gcc.gnu.org/pub/gcc/infrastructure/gmp-6.1.0.tar.bz2 -http://gcc.gnu.org/pub/gcc/infrastructure/mpfr-3.1.4.tar.bz2 -http://gcc.gnu.org/pub/gcc/infrastructure/mpc-1.0.3.tar.gz -http://gcc.gnu.org/pub/gcc/infrastructure/isl-0.18.tar.bz2 -``` - -解压这四个依赖,然后移动到 gcc-10.1.0 源码目录下,并重命名为 gmp、isl、mpc、mpfr。 - -下载并安装 automake-1.15(因为gcc10编译过程中会查找automake 1.15 版本) - -``` -https://ftp.gnu.org/gnu/automake/automake-1.15.tar.gz -tar xzf automake-1.15.tar.gz -./configure --prefix=/home/doris/tools/installed -make && make install -export PATH=/home/doris/tools/installed/bin:$PATH -``` - -编译GCC10: - -``` -cd gcc-10.1.0 -./configure --prefix=/home/doris/tools/installed -make -j && make install -``` - -编译时间较长。 - -#### 2. 安装其他编译组件 - -1. jdk-8u291-linux-aarch64.tar.gz - - `https://www.oracle.com/java/technologies/javase/javase-jdk8-downloads.html` - - 无需编译,开箱即用。 - -2. cmake-3.19.8-Linux-aarch64.tar.gz - - `https://cmake.org/download/` - - 无需编译,开箱即用 - -3. apache-maven-3.8.1-bin.tar.gz - - `https://maven.apache.org/download.cgi` - - 无需编译,开箱即用 - -4. nodejs 16.3.0 - - `https://nodejs.org/dist/v16.3.0/node-v16.3.0-linux-arm64.tar.xz` - - 无需编译,开箱即用 - -5. libtool-2.4.6.tar.gz - - 编译第三方组件用,虽然系统可能自带了libtool,但是libtool需要和automake在一起,这样不容易出问题。 - - ``` - https://ftp.gnu.org/gnu/libtool/libtool-2.4.6.tar.gz - cd libtool-2.4.6/ - ./configure --prefix=/home/doris/tools/installed - make -j && make install - ``` - -6. binutils-2.36.tar.xz(获取bdf.h) - - ``` - https://ftp.gnu.org/gnu/binutils/binutils-2.36.tar.bz2 - ./configure --prefix=/home/doris/tools/installed - make -j && make install - ``` - -7. libiberty(编译BE用) - - 这个库的源码就在 gcc-10.1.0 的源码包下 - ``` - cd gcc-10.1.0/libiberty/ - ./configure --prefix=/home/doris/tools/installed - make - ``` - - 编译后会产生 libiberty.a,后续移动到 Doris 的thirdparty 的 lib64 目录中即可。 - -#### 3. 编译第三方库 - -假设Doris源码在 `/home/doris/doris-src/` 下。 - -1. 手动下载所有第三方库并放在 thirdparty/src 目录下。 -2. 在Doris源码目录下新增 `custom_env.sh` 并添加如下内容 - - ``` - export DORIS_THIRDPARTY=/home/doris/doris-src/thirdparty/ - export JAVA_HOME=/home/doris/tools/jdk1.8.0_291/ - export DORIS_GCC_HOME=/home/doris/tools/installed/ - export PATCH_COMPILER_RT=true - ``` - - 注意替换对应的目录 - -3. 修改 build-thirdparty.sh 中的部分内容 - - 1. 关闭 `build_mysql` 和 `build_libhdfs3` - - mysql 不再需要。而 libhdfs3 暂不支持 arm 架构,所以在arm中运行Doris,暂不支持通过 libhdfs3 直接访问 hdfs,需要通过broker。 - - 2. 在 `build_curl` 中增加 configure 参数:`--without-libpsl`。如果不添加,则在最终编译Doris BE的链接阶段,可能报错:`undefined reference to ‘psl_is_cookie_domain_acceptable'` - -4. 执行 build-thirdparty.sh。这里仅列举可能出现的错误 - - * ` error: narrowing conversion of '-1' from 'int' to 'char' [-Wnarrowing]` - - 编译brpc 0.9.7 时会出现错误,解决方案,在 brpc 的 CMakeLists.txt 的 `CMAKE_CXX_FLAGS` 中添加 `-Wno-narrowing`。brpc master 代码中已经修复这个问题: - - `https://github.com/apache/incubator-brpc/issues/1091` - - * `libz.a(deflate.o): relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `z_errmsg' which may bind externally can not be used when making a shared object; recompile with -fPIC` - - 编译brpc 0.9.7 时会出现错误,还有 libcrypto 也会报类似错误。原因未知,似乎在 aarch64 下,brpc 需要链接动态的 zlib 和 crypto 库。但是我们在编译这两个第三方库时,都只编译的了 .a 静态文件。解决方案:重新编译zlib和 openssl 生成.so 动态库: - - 打开 `build-thirdparty.sh`,找到 `build_zlib` 函数,将: - - ``` - ./configure --prefix=$TP_INSTALL_DIR --static - 就改为 - ./configure --prefix=$TP_INSTALL_DIR - ``` - - 找到 `build_openssl`,将以下部分注释掉: - - ``` - #if [ -f $TP_INSTALL_DIR/lib64/libcrypto.so ]; then - # rm -rf $TP_INSTALL_DIR/lib64/libcrypto.so* - #fi - #if [ -f $TP_INSTALL_DIR/lib64/libssl.so ]; then - # rm -rf $TP_INSTALL_DIR/lib64/libssl.so* - #fi - ``` - - 然后来到 `build-thirdparty.sh`,注释掉其他 `build_xxx`,仅打开 `build_zlib` 和 `build_openssl`,以及 `build_brpc` 和之后的 `build_xxx`。然后重新执行 `build-thirdparty.sh`。 - - * 编译到某个阶段卡住不动。 - - 不确定原因。解决方案:重跑 `build-thirdparty.sh`。`build-thirdparty.sh` 是可以重复执行的。 - -#### 4. 编译Doris源码 - -先通过以下命令查看编译机器是否支持avx2指令集 - -``` -$ cat /proc/cpuinfo | grep avx2 -``` - -不支持则使用以下命令进行编译 - -``` -$ USE_AVX2=0 sh build.sh -``` - -如果支持,可不加 USE_AVX2=0 ,直接执行 `sh build.sh` 即可。 - -#### 5. 常见错误 - -1. 编译 Doris 时出现 `undefined reference to psl_free` - - libcurl 会调用 libpsl 的函数,但 libpsl 未连接,原因未知。解决方法(二选一): - - 1. 在 `thirdparty/build-thirdparty.sh` 中的 `build_curl` 方法中添加 `--without-libpsl` 后重新编译 libcurl,然后再重新编译 Doris。 - 2. `be/CMakeLists.txt` 中 603 行左右,`-pthread` 后添加 `-lpsl`,然后重新编译 Doris。 diff --git a/docs/zh-CN/installing/compilation-with-ldb-toolchain.md b/docs/zh-CN/installing/compilation-with-ldb-toolchain.md deleted file mode 100644 index a504fb7459..0000000000 --- a/docs/zh-CN/installing/compilation-with-ldb-toolchain.md +++ /dev/null @@ -1,127 +0,0 @@ ---- -{ - "title": "使用 LDB toolchain 编译", - "language": "zh-CN" -} ---- - - - -# 使用 LDB toolchain 编译 - -本文档主要介绍如何使用 LDB toolchain 编译 Doris。该方式目前作为 Docker 编译方式的补充,方便没有 Docker 环境的开发者和用户编译 Doris 源码。 - -> 您依然可以使用 Docker 开发镜像编译最新代码:`apache/incubator-doris:build-env-ldb-toolchain-latest` - -> 感谢 [Amos Bird](https://github.com/amosbird) 的贡献。 - -## 准备编译环境 - -该方式适用于绝大多数 Linux 发行版(CentOS,Ubuntu 等)。 - -1. 下载 `ldb_toolchain_gen.sh` - - 可以从 [这里](https://github.com/amosbird/ldb_toolchain_gen/releases) 下载最新的 `ldb_toolchain_gen.sh`。该脚本用于生成 ldb toolchain。 - - > 更多信息,可访问 [https://github.com/amosbird/ldb_toolchain_gen](https://github.com/amosbird/ldb_toolchain_gen) - -2. 执行以下命令生成 ldb toolchain - - ``` - sh ldb_toolchain_gen.sh /path/to/ldb_toolchain/ - ``` - - 其中 `/path/to/ldb_toolchain/` 为安装 toolchain 目录。 - - 执行成功后,会在 `/path/to/ldb_toolchain/` 下生成如下目录结构: - - ``` - ├── bin - ├── include - ├── lib - ├── share - ├── test - └── usr - ``` - -3. 下载并安装其他编译组件 - - 1. [Java8](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/jdk-8u131-linux-x64.tar.gz) - 2. [Apache Maven 3.6.3](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/apache-maven-3.6.3-bin.tar.gz) - 3. [Node v12.13.0](https://doris-thirdparty-repo.bj.bcebos.com/thirdparty/node-v12.13.0-linux-x64.tar.gz) - - 对于不同的 Linux 发行版,可能默认包含的组件不同。因此可能需要安装一些额外的组件。下面以 centos6 为例,其他发行版类似: - - ``` - # install required system packages - sudo yum install -y byacc patch automake libtool make which file ncurses-devel gettext-devel unzip bzip2 zip util-linux wget git python2 - - # install autoconf-2.69 - wget http://ftp.gnu.org/gnu/autoconf/autoconf-2.69.tar.gz && \ - tar zxf autoconf-2.69.tar.gz && \ - cd autoconf-2.69 && \ - ./configure && \ - make && \ - make install - - # install bison-3.0.4 - wget http://ftp.gnu.org/gnu/bison/bison-3.0.4.tar.gz && \ - tar xzf bison-3.0.4.tar.gz && \ - cd bison-3.0.4 && \ - ./configure && \ - make && \ - make install - ``` - -4. 下载 Doris 源码 - - ``` - git clone https://github.com/apache/incubator-doris.git - ``` - - 下载完成后,进入到 doris 源码目录,创建 `custom_env.sh`,文件,并设置 PATH 环境变量,如: - - ``` - export JAVA_HOME=/path/to/java/ - export PATH=$JAVA_HOME/bin:$PATH - export PATH=/path/to/maven/bin:$PATH - export PATH=/path/to/node/bin:$PATH - export PATH=/path/to/ldb_toolchain/bin:$PATH - ``` - -## 编译 Doris - -进入 Doris 源码目录,执行: - -``` -$ cat /proc/cpuinfo | grep avx2 -``` - -查看编译机器是否支持avx2指令集 - -不支持则使用以下命令进行编译 - -``` -$ USE_AVX2=0 sh build.sh -``` - -若支持则直接执行 `sh build.sh` 即可 - -该脚本会先编译第三方库,之后再编译 Doris 组件(FE、BE)。编译产出在 `output/` 目录下。 diff --git a/docs/zh-CN/installing/compilation.md b/docs/zh-CN/installing/compilation.md deleted file mode 100644 index d177b3b334..0000000000 --- a/docs/zh-CN/installing/compilation.md +++ /dev/null @@ -1,261 +0,0 @@ ---- -{ - "title": "编译", - "language": "zh-CN" -} ---- - - - -# 编译 - -本文档主要介绍如何通过源码编译 Doris。 - -## 使用 Docker 开发镜像编译(推荐) - -### 使用现成的镜像 - -1. 下载 Docker 镜像 - - `$ docker pull apache/incubator-doris:build-env-ldb-toolchain-latest` - - 检查镜像下载完成: - - ``` - $ docker images - REPOSITORY TAG IMAGE ID CREATED SIZE - apache/incubator-doris build-env-ldb-toolchain-latest 49f68cecbc1a 4 days ago 3.76GB - ``` - -> 注1:针对不同的 Doris 版本,需要下载对应的镜像版本。从 Apache Doris 0.15 版本起,后续镜像版本号将与 Doris 版本号统一。比如可以使用 `apache/incubator-doris:build-env-for-0.15.0 ` 来编译 0.15.0 版本。 -> -> 注2:`apache/incubator-doris:build-env-ldb-toolchain-latest` 用于编译最新主干版本代码,会随主干版本不断更新。可以查看 `docker/README.md` 中的更新时间。 - -| 镜像版本 | commit id | doris 版本 | -|---|---|---| -| apache/incubator-doris:build-env | before [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) | 0.8.x, 0.9.x | -| apache/incubator-doris:build-env-1.1 | [ff0dd0d](https://github.com/apache/incubator-doris/commit/ff0dd0d2daa588f18b6db56f947e813a56d8ec81) | 0.10.x, 0.11.x | -| apache/incubator-doris:build-env-1.2 | [4ef5a8c](https://github.com/apache/incubator-doris/commit/4ef5a8c8560351d7fff7ff8fd51c4c7a75e006a8) | 0.12.x - 0.14.0 | -| apache/incubator-doris:build-env-1.3.1 | [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) | 0.14.x | -| apache/incubator-doris:build-env-for-0.15.0 | [a81f4da](https://github.com/apache/incubator-doris/commit/a81f4da4e461a54782a96433b746d07be89e6b54) or later | 0.15.0 | -| apache/incubator-doris:build-env-latest | before [0efef1b](https://github.com/apache/incubator-doris/commit/0efef1b332300887ee0473f9df9bdd9d7297d824) | | -| apache/incubator-doris:build-env-ldb-toolchain-latest | trunk | | - -**注意**: - -> 1. 编译镜像 [ChangeLog](https://github.com/apache/incubator-doris/blob/master/thirdparty/CHANGELOG.md)。 - -> 2. doris 0.14.0 版本仍然使用apache/incubator-doris:build-env-1.2 编译,0.14.x 版本的代码将使用apache/incubator-doris:build-env-1.3.1。 - -> 3. 从 build-env-1.3.1 的docker镜像起,同时包含了 OpenJDK 8 和 OpenJDK 11,并且默认使用 OpenJDK 11 编译。请确保编译使用的 JDK 版本和运行时使用的 JDK 版本一致,否则会导致非预期的运行错误。你可以使用在进入编译镜像的容器后,使用以下命令切换默认 JDK 版本: -> -> 切换到 JDK 8: -> -> ``` -> $ alternatives --set java java-1.8.0-openjdk.x86_64 -> $ alternatives --set javac java-1.8.0-openjdk.x86_64 -> $ export JAVA_HOME=/usr/lib/jvm/java-1.8.0 -> ``` -> -> 切换到 JDK 11: -> -> ``` -> $ alternatives --set java java-11-openjdk.x86_64 -> $ alternatives --set javac java-11-openjdk.x86_64 -> $ export JAVA_HOME=/usr/lib/jvm/java-11 -> ``` - -2. 运行镜像 - - `$ docker run -it apache/incubator-doris:build-env-ldb-toolchain-latest` - - 建议以挂载本地 Doris 源码目录的方式运行镜像,这样编译的产出二进制文件会存储在宿主机中,不会因为镜像退出而消失。 - - 同时,建议同时将镜像中 maven 的 `.m2` 目录挂载到宿主机目录,以防止每次启动镜像编译时,重复下载 maven 的依赖库。 - - ``` - $ docker run -it -v /your/local/.m2:/root/.m2 -v /your/local/incubator-doris-DORIS-x.x.x-release/:/root/incubator-doris-DORIS-x.x.x-release/ apache/incubator-doris:build-env-ldb-toolchain-latest - ``` - -3. 下载源码 - - 启动镜像后,你应该已经处于容器内。可以通过以下命令下载 Doris 源码(已挂载本地源码目录则不用): - - ``` - $ wget https://dist.apache.org/repos/dist/dev/incubator/doris/xxx.tar.gz - or - $ git clone https://github.com/apache/incubator-doris.git - ``` - -4. 编译 Doris - - 先通过以下命令查看编译机器是否支持avx2指令集 - - ``` - $ cat /proc/cpuinfo | grep avx2 - ``` - - 不支持则使用以下命令进行编译 - - ``` - $ USE_AVX2=0 sh build.sh - ``` - - 如果支持,可不加 USE_AVX2=0 ,直接进行编译 - - ``` - $ sh build.sh - ``` - - >**注意:** - > - >如果你是第一次使用 `build-env-for-0.15.0` 或之后的版本,第一次编译的时候要使用如下命令: - > - > `sh build.sh --clean --be --fe --ui` - > - > 这是因为 build-env-for-0.15.0 版本镜像升级了 thrift(0.9 -> 0.13),需要通过 --clean 命令强制使用新版本的 thrift 生成代码文件,否则会出现不兼容的代码。 - - 编译完成后,产出文件在 `output/` 目录中。 - -### 自行编译开发环境镜像 - -你也可以自己创建一个 Doris 开发环境镜像,具体可参阅 `docker/README.md` 文件。 - - -## 直接编译(CentOS/Ubuntu) - -你可以在自己的 linux 环境中直接尝试编译 Doris。 - -1. 系统依赖 - 不同的版本依赖也不相同 - * 在 [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) 之前版本依赖如下: - - `GCC 7.3+, Oracle JDK 1.8+, Python 2.7+, Apache Maven 3.5+, CMake 3.11+ Bison 3.0+` - - 如果使用Ubuntu 16.04 及以上系统 可以执行以下命令来安装依赖 - - `sudo apt-get install build-essential openjdk-8-jdk maven cmake byacc flex automake libtool-bin bison binutils-dev libiberty-dev zip unzip libncurses5-dev curl git ninja-build python autopoint pkg-config` - - 如果是CentOS 可以执行以下命令 - - `sudo yum groupinstall 'Development Tools' && sudo yum install maven cmake byacc flex automake libtool bison binutils-devel zip unzip ncurses-devel curl git wget python2 glibc-static libstdc++-static java-1.8.0-openjdk` - - * 在 [ad67dd3](https://github.com/apache/incubator-doris/commit/ad67dd34a04c1ca960cff38e5b335b30fc7d559f) 之后版本依赖如下: - - `GCC 10+, Oracle JDK 1.8+, Python 2.7+, Apache Maven 3.5+, CMake 3.19.2+ Bison 3.0+` - - 如果使用Ubuntu 16.04 及以上系统 可以执行以下命令来安装依赖 - ``` - sudo apt install build-essential openjdk-8-jdk maven cmake byacc flex automake libtool-bin bison binutils-dev libiberty-dev zip unzip libncurses5-dev curl git ninja-build python - sudo add-apt-repository ppa:ubuntu-toolchain-r/ppa - sudo apt update - sudo apt install gcc-10 g++-10 - sudo apt-get install autoconf automake libtool autopoint - ``` - - 如果是CentOS 可以执行以下命令 - ``` - sudo yum groupinstall 'Development Tools' && sudo yum install maven cmake byacc flex automake libtool bison binutils-devel zip unzip ncurses-devel curl git wget python2 glibc-static libstdc++-static java-1.8.0-openjdk - sudo yum install centos-release-scl - sudo yum install devtoolset-10 - scl enable devtoolset-10 bash - ``` - 如果当前仓库没有提供devtoolset-10 可以添加如下repo 使用oracle 提供 package - ``` - [ol7_software_collections] - name=Software Collection packages for Oracle Linux 7 ($basearch) - baseurl=http://yum.oracle.com/repo/OracleLinux/OL7/SoftwareCollections/$basearch/ - gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-oracle - gpgcheck=1 - enabled=1 - ``` - - 安装完成后,自行设置环境变量 `PATH`, `JAVA_HOME` 等。(可以通过`alternatives --list`命令找到jdk的安装目录) - 注意: Doris 0.14.0 的版本仍然使用gcc7 的依赖编译,之后的代码将使用gcc10 的依赖 - -2. 编译 Doris - - 与使用 Docker 开发镜像编译一样,编译之前先检查是否支持avx2指令 - - ``` - $ cat /proc/cpuinfo | grep avx2 - ``` - - 支持则使用下面命令进行编译 - - ``` - $ sh build.sh - ``` - - 如不支持需要加 USE_AVX2=0 - - ``` - $ USE_AVX2=0 sh build.sh - ``` - - 编译完成后,产出文件在 `output/` 目录中。 - -## 常见问题 - -1. `Could not transfer artifact net.sourceforge.czt.dev:cup-maven-plugin:pom:1.6-cdh from/to xxx` - - 如遇到上述错误,请参照 [PR #4769](https://github.com/apache/incubator-doris/pull/4769/files) 修改 `fe/pom.xml` 中 cloudera 相关的仓库配置。 - -2. 第三方依赖下载连接错误、失效等问题 - - Doris 所依赖的第三方库的下载连接都在 `thirdparty/vars.sh` 文件内。随着时间推移,一些下载连接可能会失效。如果遇到这种情况。可以使用如下两种方式解决: - - 1. 手动修改 `thirdparty/vars.sh` 文件 - - 手动修改有问题的下载连接和对应的 MD5 值。 - - 2. 使用第三方下载仓库: - - ``` - export REPOSITORY_URL=https://doris-thirdparty-repo.bj.bcebos.com/thirdparty - sh build-thirdparty.sh - ``` - - REPOSITORY_URL 中包含所有第三方库源码包和他们的历史版本。 - -3. `fatal error: Killed signal terminated program ...` - - 使用 Docker 镜像编译时如遇到上述报错,可能是分配给镜像的内存不足(Docker 默认分配的内存大小为 2GB,编译过程中内存占用的峰值大于 2GB)。 - - 尝试适当调大镜像的分配内存,推荐 4GB ~ 8GB。 - -## 特别声明 - -自 0.13 版本开始,默认的编译产出中将取消对 [1] 和 [2] 两个第三方库的依赖。这两个第三方库为 [GNU General Public License V3](https://www.gnu.org/licenses/gpl-3.0.en.html) 协议。该协议与 [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0) 协议不兼容,因此默认不出现在 Apache 发布版本中。 - -移除依赖库 [1] 会导致无法访问 MySQL 外部表。访问 MySQL 外部表的功能会在后续版本中通过 UnixODBC 实现。 - -移除依赖库 [2] 会导致在无法读取部分早期版本(0.8版本之前)写入的部分数据。因为早期版本中的数据是使用 LZO 算法压缩的,在之后的版本中,已经更改为 LZ4 压缩算法。后续我们会提供工具用于检测和转换这部分数据。 - -如果有需求,用户可以继续使用这两个依赖库。如需使用,需要在编译时添加如下选项: - -``` -WITH_MYSQL=1 WITH_LZO=1 sh build.sh -``` - -注意,当用户依赖这两个第三方库时,则默认不在 Apache License 2.0 协议框架下使用 Doris。请注意 GPL 相关协议约束。 - -* [1] mysql-5.7.18 -* [2] lzo-2.10 diff --git a/docs/zh-CN/installing/install-deploy.md b/docs/zh-CN/installing/install-deploy.md deleted file mode 100644 index 43db41e34b..0000000000 --- a/docs/zh-CN/installing/install-deploy.md +++ /dev/null @@ -1,475 +0,0 @@ ---- -{ - "title": "安装与部署", - "language": "zh-CN" -} ---- - - - -# 安装与部署 - -该文档主要介绍了部署 Doris 所需软硬件环境、建议的部署方式、集群扩容缩容,以及集群搭建到运行过程中的常见问题。 -在阅读本文档前,请先根据编译文档编译 Doris。 - -## 软硬件需求 - -### 概述 - -Doris 作为一款开源的 MPP 架构 OLAP 数据库,能够运行在绝大多数主流的商用服务器上。为了能够充分运用 MPP 架构的并发优势,以及 Doris 的高可用特性,我们建议 Doris 的部署遵循以下需求: - -#### Linux 操作系统版本需求 - -| Linux 系统 | 版本 | -|---|---| -| CentOS | 7.1 及以上 | -| Ubuntu | 16.04 及以上 | - -#### 软件需求 - -| 软件 | 版本 | -|---|---| -| Java | 1.8 及以上 | -| GCC | 4.8.2 及以上 | - -#### 操作系统安装要求 - -##### 设置系统最大打开文件句柄数 - -``` -vi /etc/security/limits.conf -* soft nofile 65536 -* hard nofile 65536 -``` - -##### 时钟同步 - -Doris 的元数据要求时间精度要小于5000ms,所以所有集群所有机器要进行时钟同步,避免因为时钟问题引发的元数据不一致导致服务出现异常。 - -##### 关闭交换分区(swap) - -Linux交换分区会给Doris带来很严重的性能问题,需要在安装之前禁用交换分区 - -##### Liunx文件系统 - -这里我们推荐使用ext4文件系统,在安装操作系统的时候,请选择ext4文件系统。 - -#### 开发测试环境 - -| 模块 | CPU | 内存 | 磁盘 | 网络 | 实例数量 | -|---|---|---|---|---|---| -| Frontend | 8核+ | 8GB+ | SSD 或 SATA,10GB+ * | 千兆网卡 | 1 | -| Backend | 8核+ | 16GB+ | SSD 或 SATA,50GB+ * | 千兆网卡 | 1-3 * | - -#### 生产环境 - -| 模块 | CPU | 内存 | 磁盘 | 网络 | 实例数量(最低要求) | -|---|---|---|---|---|---| -| Frontend | 16核+ | 64GB+ | SSD 或 RAID 卡,100GB+ * | 万兆网卡 | 1-5 * | -| Backend | 16核+ | 64GB+ | SSD 或 SATA,100G+ * | 万兆网卡 | 10-100 * | - -> 注1: -> 1. FE 的磁盘空间主要用于存储元数据,包括日志和 image。通常从几百 MB 到几个 GB 不等。 -> 2. BE 的磁盘空间主要用于存放用户数据,总磁盘空间按用户总数据量 * 3(3副本)计算,然后再预留额外 40% 的空间用作后台 compaction 以及一些中间数据的存放。 -> 3. 一台机器上可以部署多个 BE 实例,但是**只能部署一个 FE**。如果需要 3 副本数据,那么至少需要 3 台机器各部署一个 BE 实例(而不是1台机器部署3个BE实例)。**多个FE所在服务器的时钟必须保持一致(允许最多5秒的时钟偏差)** -> 4. 测试环境也可以仅适用一个 BE 进行测试。实际生产环境,BE 实例数量直接决定了整体查询延迟。 -> 5. 所有部署节点关闭 Swap。 - -> 注2:FE 节点的数量 -> 1. FE 角色分为 Follower 和 Observer,(Leader 为 Follower 组中选举出来的一种角色,以下统称 Follower,具体含义见 [元数据设计文档](../internal/metadata-design))。 -> 2. FE 节点数据至少为1(1 个 Follower)。当部署 1 个 Follower 和 1 个 Observer 时,可以实现读高可用。当部署 3 个 Follower 时,可以实现读写高可用(HA)。 -> 3. Follower 的数量**必须**为奇数,Observer 数量随意。 -> 4. 根据以往经验,当集群可用性要求很高时(比如提供在线业务),可以部署 3 个 Follower 和 1-3 个 Observer。如果是离线业务,建议部署 1 个 Follower 和 1-3 个 Observer。 - -* **通常我们建议 10 ~ 100 台左右的机器,来充分发挥 Doris 的性能(其中 3 台部署 FE(HA),剩余的部署 BE)** -* **当然,Doris的性能与节点数量及配置正相关。在最少4台机器(一台 FE,三台 BE,其中一台 BE 混部一个 Observer FE 提供元数据备份),以及较低配置的情况下,依然可以平稳的运行 Doris。** -* **如果 FE 和 BE 混部,需注意资源竞争问题,并保证元数据目录和数据目录分属不同磁盘。** - -#### Broker 部署 - -Broker 是用于访问外部数据源(如 hdfs)的进程。通常,在每台机器上部署一个 broker 实例即可。 - -#### 网络需求 - -Doris 各个实例直接通过网络进行通讯。以下表格展示了所有需要的端口 - -| 实例名称 | 端口名称 | 默认端口 | 通讯方向 | 说明 | -|---|---|---|---| ---| -| BE | be_port | 9060 | FE --> BE | BE 上 thrift server 的端口,用于接收来自 FE 的请求 | -| BE | webserver_port | 8040 | BE <--> BE | BE 上的 http server 的端口 | -| BE | heartbeat\_service_port | 9050 | FE --> BE | BE 上心跳服务端口(thrift),用于接收来自 FE 的心跳 | -| BE | brpc\_port | 8060 | FE <--> BE, BE <--> BE | BE 上的 brpc 端口,用于 BE 之间通讯 | -| FE | http_port | 8030 | FE <--> FE,用户 <--> FE |FE 上的 http server 端口 | -| FE | rpc_port | 9020 | BE --> FE, FE <--> FE | FE 上的 thrift server 端口,每个fe的配置需要保持一致| -| FE | query_port | 9030 | 用户 <--> FE | FE 上的 mysql server 端口 | -| FE | edit\_log_port | 9010 | FE <--> FE | FE 上的 bdbje 之间通信用的端口 | -| Broker | broker\_ipc_port | 8000 | FE --> Broker, BE --> Broker | Broker 上的 thrift server,用于接收请求 | - -> 注: -> 1. 当部署多个 FE 实例时,要保证 FE 的 http\_port 配置相同。 -> 2. 部署前请确保各个端口在应有方向上的访问权限。 - -#### IP 绑定 - -因为有多网卡的存在,或因为安装过 docker 等环境导致的虚拟网卡的存在,同一个主机可能存在多个不同的 ip。当前 Doris 并不能自动识别可用 IP。所以当遇到部署主机上有多个 IP 时,必须通过 priority\_networks 配置项来强制指定正确的 IP。 - -priority\_networks 是 FE 和 BE 都有的一个配置,配置项需写在 fe.conf 和 be.conf 中。该配置项用于在 FE 或 BE 启动时,告诉进程应该绑定哪个IP。示例如下: - -`priority_networks=10.1.3.0/24` - -这是一种 [CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) 的表示方法。FE 或 BE 会根据这个配置项来寻找匹配的IP,作为自己的 localIP。 - -**注意**:当配置完 priority\_networks 并启动 FE 或 BE 后,只是保证了 FE 或 BE 自身的 IP 进行了正确的绑定。而在使用 ADD BACKEND 或 ADD FRONTEND 语句中,也需要指定和 priority\_networks 配置匹配的 IP,否则集群无法建立。举例: - -BE 的配置为:`priority_networks=10.1.3.0/24` - -但是在 ADD BACKEND 时使用的是:`ALTER SYSTEM ADD BACKEND "192.168.0.1:9050";` - -则 FE 和 BE 将无法正常通信。 - -这时,必须 DROP 掉这个添加错误的 BE,重新使用正确的 IP 执行 ADD BACKEND。 - -FE 同理。 - -BROKER 当前没有,也不需要 priority\_networks 这个选项。Broker 的服务默认绑定在 0.0.0.0 上。只需在 ADD BROKER 时,执行正确可访问的 BROKER IP 即可。 - -#### 表名大小写敏感性设置 - -doris默认为表名大小写敏感,如有表名大小写不敏感的需求需在集群初始化时进行设置。表名大小写敏感性在集群初始化完成后不可再修改。 - -详细参见 [变量](../administrator-guide/variables.md##支持的变量) 中关于`lower_case_table_names`变量的介绍。 - -## 集群部署 - -### 手动部署 - -#### FE 部署 - -* 拷贝 FE 部署文件到指定节点 - - 将源码编译生成的 output 下的 fe 文件夹拷贝到 FE 的节点指定部署路径下并进入该目录。 - -* 配置 FE - - 1. 配置文件为 conf/fe.conf。其中注意:`meta_dir`是元数据存放位置。默认值为 `${DORIS_HOME}/doris-meta`。需**手动创建**该目录。 - - **注意:生产环境强烈建议单独指定目录不要放在Doris安装目录下,最好是单独的磁盘(如果有SSD最好),测试开发环境可以使用默认配置** - - 2. fe.conf 中 JAVA_OPTS 默认 java 最大堆内存为 4GB,**建议生产环境调整至 8G 以上**。 - -* 启动FE - - `bin/start_fe.sh --daemon` - - FE进程启动进入后台执行。日志默认存放在 log/ 目录下。如启动失败,可以通过查看 log/fe.log 或者 log/fe.out 查看错误信息。 - -* 如需部署多 FE,请参见 "FE 扩容和缩容" 章节 - -#### BE 部署 - -* 拷贝 BE 部署文件到所有要部署 BE 的节点 - - 将源码编译生成的 output 下的 be 文件夹拷贝到 BE 的节点的指定部署路径下。 - - > 注意:`output/be/lib/debug_info/` 目录下为调试信息文件,文件较大,但实际运行不需要这些文件,可以不部署。 - -* 修改所有 BE 的配置 - - 修改 be/conf/be.conf。主要是配置 `storage_root_path`:数据存放目录。默认在be/storage下,需要**手动创建**该目录。多个路径之间使用英文状态的分号 `;` 分隔(**最后一个目录后不要加 `;`**)。可以通过路径区别存储目录的介质,HDD或SSD。可以添加容量限制在每个路径的末尾,通过英文状态逗号`,`隔开。 - - 示例1如下: - - **注意:如果是SSD磁盘要在目录后面加上`.SSD`,HDD磁盘在目录后面加`.HDD`** - - `storage_root_path=/home/disk1/doris.HDD,50;/home/disk2/doris.SSD,10;/home/disk2/doris` - - **说明** - - - /home/disk1/doris.HDD,50,表示存储限制为50GB,HDD; - - /home/disk2/doris.SSD,10,存储限制为10GB,SSD; - - /home/disk2/doris,存储限制为磁盘最大容量,默认为HDD - - 示例2如下: - - **注意:不论HDD磁盘目录还是SSD磁盘目录,都无需添加后缀,storage_root_path参数里指定medium即可** - - `storage_root_path=/home/disk1/doris,medium:hdd,capacity:50;/home/disk2/doris,medium:ssd,capacity:50` - - **说明** - - - /home/disk1/doris,medium:hdd,capacity:10,表示存储限制为10GB, HDD; - - /home/disk2/doris,medium:ssd,capacity:50,表示存储限制为50GB, SSD; - -* BE webserver_port端口配置 - - 如果 be 部署在 hadoop 集群中,注意调整 be.conf 中的 `webserver_port = 8040` ,以免造成端口冲突 - -* 在 FE 中添加所有 BE 节点 - - BE 节点需要先在 FE 中添加,才可加入集群。可以使用 mysql-client([下载MySQL 5.7](https://dev.mysql.com/downloads/mysql/5.7.html)) 连接到 FE: - - `./mysql-client -h fe_host -P query_port -uroot` - - 其中 fe_host 为 FE 所在节点 ip;query_port 在 fe/conf/fe.conf 中的;默认使用 root 账户,无密码登录。 - - 登录后,执行以下命令来添加每一个 BE: - - `ALTER SYSTEM ADD BACKEND "be_host:heartbeat-service_port";` - - 其中 be_host 为 BE 所在节点 ip;heartbeat_service_port 在 be/conf/be.conf 中。 - -* 启动 BE - - `bin/start_be.sh --daemon` - - BE 进程将启动并进入后台执行。日志默认存放在 be/log/ 目录下。如启动失败,可以通过查看 be/log/be.log 或者 be/log/be.out 查看错误信息。 - -* 查看BE状态 - - 使用 mysql-client 连接到 FE,并执行 `SHOW PROC '/backends';` 查看 BE 运行情况。如一切正常,`Alive` 列应为 `true`。 - -#### (可选)FS_Broker 部署 - -Broker 以插件的形式,独立于 Doris 部署。如果需要从第三方存储系统导入数据,需要部署相应的 Broker,默认提供了读取 HDFS 、百度云 BOS 及 Amazon S3 的 fs_broker。fs_broker 是无状态的,建议每一个 FE 和 BE 节点都部署一个 Broker。 - -* 拷贝源码 fs_broker 的 output 目录下的相应 Broker 目录到需要部署的所有节点上。建议和 BE 或者 FE 目录保持同级。 - -* 修改相应 Broker 配置 - - 在相应 broker/conf/ 目录下对应的配置文件中,可以修改相应配置。 - - * 启动 Broker - - `bin/start_broker.sh --daemon` - -* 添加 Broker - - 要让 Doris 的 FE 和 BE 知道 Broker 在哪些节点上,通过 sql 命令添加 Broker 节点列表。 - - 使用 mysql-client 连接启动的 FE,执行以下命令: - - `ALTER SYSTEM ADD BROKER broker_name "broker_host1:broker_ipc_port1","broker_host2:broker_ipc_port2",...;` - - 其中 broker_host 为 Broker 所在节点 ip;broker_ipc_port 在 Broker 配置文件中的conf/apache_hdfs_broker.conf。 - -* 查看 Broker 状态 - - 使用 mysql-client 连接任一已启动的 FE,执行以下命令查看 Broker 状态:`SHOW PROC "/brokers";` - -**注:在生产环境中,所有实例都应使用守护进程启动,以保证进程退出后,会被自动拉起,如 [Supervisor](http://supervisord.org/)。如需使用守护进程启动,在 0.9.0 及之前版本中,需要修改各个 start_xx.sh 脚本,去掉最后的 & 符号**。从 0.10.0 版本开始,直接调用 `sh start_xx.sh` 启动即可。也可参考 [这里](https://www.cnblogs.com/lenmom/p/9973401.html) - -## 扩容缩容 - -Doris 可以很方便的扩容和缩容 FE、BE、Broker 实例。 - -### FE 扩容和缩容 - -可以通过将 FE 扩容至 3 个以上节点来实现 FE 的高可用。 - -用户可以通过 mysql 客户端登陆 Master FE。通过: - -`SHOW PROC '/frontends';` - -来查看当前 FE 的节点情况。 - -也可以通过前端页面连接:```http://fe_hostname:fe_http_port/frontend``` 或者 ```http://fe_hostname:fe_http_port/system?path=//frontends``` 来查看 FE 节点的情况。 - -以上方式,都需要 Doris 的 root 用户权限。 - -FE 节点的扩容和缩容过程,不影响当前系统运行。 - -#### 增加 FE 节点 - -FE 分为 Leader,Follower 和 Observer 三种角色。 默认一个集群,只能有一个 Leader,可以有多个 Follower 和 Observer。其中 Leader 和 Follower 组成一个 Paxos 选择组,如果 Leader 宕机,则剩下的 Follower 会自动选出新的 Leader,保证写入高可用。Observer 同步 Leader 的数据,但是不参加选举。如果只部署一个 FE,则 FE 默认就是 Leader。 - -第一个启动的 FE 自动成为 Leader。在此基础上,可以添加若干 Follower 和 Observer。 - -添加 Follower 或 Observer。使用 mysql-client 连接到已启动的 FE,并执行: - -`ALTER SYSTEM ADD FOLLOWER "follower_host:edit_log_port";` - -或 - -`ALTER SYSTEM ADD OBSERVER "observer_host:edit_log_port";` - -其中 follower\_host和observer\_host 为 Follower 或 Observer 所在节点 ip,edit\_log\_port 在其配置文件 fe.conf 中。 - -配置及启动 Follower 或 Observer。Follower 和 Observer 的配置同 Leader 的配置。第一次启动时,需执行以下命令: - -`./bin/start_fe.sh --helper leader_fe_host:edit_log_port --daemon` - -其中 leader\_fe\_host 为 Leader 所在节点 ip, edit\_log\_port 在 Leader 的配置文件 fe.conf 中。--helper 参数仅在 follower 和 observer 第一次启动时才需要。 - -查看 Follower 或 Observer 运行状态。使用 mysql-client 连接到任一已启动的 FE,并执行:SHOW PROC '/frontends'; 可以查看当前已加入集群的 FE 及其对应角色。 - -> FE 扩容注意事项: -> 1. Follower FE(包括 Leader)的数量必须为奇数,建议最多部署 3 个组成高可用(HA)模式即可。 -> 2. 当 FE 处于高可用部署时(1个 Leader,2个 Follower),我们建议通过增加 Observer FE 来扩展 FE 的读服务能力。当然也可以继续增加 Follower FE,但几乎是不必要的。 -> 3. 通常一个 FE 节点可以应对 10-20 台 BE 节点。建议总的 FE 节点数量在 10 个以下。而通常 3 个即可满足绝大部分需求。 -> 4. helper 不能指向 FE 自身,必须指向一个或多个已存在并且正常运行中的 Master/Follower FE。 - -#### 删除 FE 节点 - -使用以下命令删除对应的 FE 节点: - -```ALTER SYSTEM DROP FOLLOWER[OBSERVER] "fe_host:edit_log_port";``` - -> FE 缩容注意事项: -> 1. 删除 Follower FE 时,确保最终剩余的 Follower(包括 Leader)节点为奇数。 - -### BE 扩容和缩容 - -用户可以通过 mysql-client 登陆 Leader FE。通过: - -```SHOW PROC '/backends';``` - -来查看当前 BE 的节点情况。 - -也可以通过前端页面连接:```http://fe_hostname:fe_http_port/backend``` 或者 ```http://fe_hostname:fe_http_port/system?path=//backends``` 来查看 BE 节点的情况。 - -以上方式,都需要 Doris 的 root 用户权限。 - -BE 节点的扩容和缩容过程,不影响当前系统运行以及正在执行的任务,并且不会影响当前系统的性能。数据均衡会自动进行。根据集群现有数据量的大小,集群会在几个小时到1天不等的时间内,恢复到负载均衡的状态。集群负载情况,可以参见 [Tablet 负载均衡文档](../administrator-guide/operation/tablet-repair-and-balance.md)。 - -#### 增加 BE 节点 - -BE 节点的增加方式同 **BE 部署** 一节中的方式,通过 `ALTER SYSTEM ADD BACKEND` 命令增加 BE 节点。 - -> BE 扩容注意事项: -> 1. BE 扩容后,Doris 会自动根据负载情况,进行数据均衡,期间不影响使用。 - -#### 删除 BE 节点 - -删除 BE 节点有两种方式:DROP 和 DECOMMISSION - -DROP 语句如下: - -```ALTER SYSTEM DROP BACKEND "be_host:be_heartbeat_service_port";``` - -**注意:DROP BACKEND 会直接删除该 BE,并且其上的数据将不能再恢复!!!所以我们强烈不推荐使用 DROP BACKEND 这种方式删除 BE 节点。当你使用这个语句时,会有对应的防误操作提示。** - -DECOMMISSION 语句如下: - -```ALTER SYSTEM DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";``` - -> DECOMMISSION 命令说明: -> 1. 该命令用于安全删除 BE 节点。命令下发后,Doris 会尝试将该 BE 上的数据向其他 BE 节点迁移,当所有数据都迁移完成后,Doris 会自动删除该节点。 -> 2. 该命令是一个异步操作。执行后,可以通过 ```SHOW PROC '/backends';``` 看到该 BE 节点的 isDecommission 状态为 true。表示该节点正在进行下线。 -> 3. 该命令**不一定执行成功**。比如剩余 BE 存储空间不足以容纳下线 BE 上的数据,或者剩余机器数量不满足最小副本数时,该命令都无法完成,并且 BE 会一直处于 isDecommission 为 true 的状态。 -> 4. DECOMMISSION 的进度,可以通过 ```SHOW PROC '/backends';``` 中的 TabletNum 查看,如果正在进行,TabletNum 将不断减少。 -> 5. 该操作可以通过: -> ```CANCEL DECOMMISSION BACKEND "be_host:be_heartbeat_service_port";``` -> 命令取消。取消后,该 BE 上的数据将维持当前剩余的数据量。后续 Doris 重新进行负载均衡 - -**对于多租户部署环境下,BE 节点的扩容和缩容,请参阅 [多租户设计文档](../administrator-guide/operation/multi-tenant.md)。** - -### Broker 扩容缩容 - -Broker 实例的数量没有硬性要求。通常每台物理机部署一个即可。Broker 的添加和删除可以通过以下命令完成: - -```ALTER SYSTEM ADD BROKER broker_name "broker_host:broker_ipc_port";``` -```ALTER SYSTEM DROP BROKER broker_name "broker_host:broker_ipc_port";``` -```ALTER SYSTEM DROP ALL BROKER broker_name;``` - -Broker 是无状态的进程,可以随意启停。当然,停止后,正在其上运行的作业会失败,重试即可。 - -## 常见问题 - -### 进程相关 - -1. 如何确定 FE 进程启动成功 - - FE 进程启动后,会首先加载元数据,根据 FE 角色的不同,在日志中会看到 ```transfer from UNKNOWN to MASTER/FOLLOWER/OBSERVER```。最终会看到 ```thrift server started``` 日志,并且可以通过 mysql 客户端连接到 FE,则表示 FE 启动成功。 - - 也可以通过如下连接查看是否启动成功: - `http://fe_host:fe_http_port/api/bootstrap` - - 如果返回: - `{"status":"OK","msg":"Success"}` - - 则表示启动成功,其余情况,则可能存在问题。 - - > 注:如果在 fe.log 中查看不到启动失败的信息,也许在 fe.out 中可以看到。 - -2. 如何确定 BE 进程启动成功 - - BE 进程启动后,如果之前有数据,则可能有数分钟不等的数据索引加载时间。 - - 如果是 BE 的第一次启动,或者该 BE 尚未加入任何集群,则 BE 日志会定期滚动 ```waiting to receive first heartbeat from frontend``` 字样。表示 BE 还未通过 FE 的心跳收到 Master 的地址,正在被动等待。这种错误日志,在 FE 中 ADD BACKEND 并发送心跳后,就会消失。如果在接到心跳后,又重复出现 ``````master client, get client from cache failed.host: , port: 0, code: 7`````` 字样,说明 FE 成功连接了 BE,但 BE 无法主动连接 FE。可能需要检查 BE 到 FE 的 rpc_port 的连通性。 - - 如果 BE 已经被加入集群,日志中应该每隔 5 秒滚动来自 FE 的心跳日志:```get heartbeat, host: xx.xx.xx.xx, port: 9020, cluster id: xxxxxx```,表示心跳正常。 - - 其次,日志中应该每隔 10 秒滚动 ```finish report task success. return code: 0``` 的字样,表示 BE 向 FE 的通信正常。 - - 同时,如果有数据查询,应该能看到不停滚动的日志,并且有 ```execute time is xxx``` 日志,表示 BE 启动成功,并且查询正常。 - - 也可以通过如下连接查看是否启动成功: - `http://be_host:be_http_port/api/health` - - 如果返回: - `{"status": "OK","msg": "To Be Added"}` - - 则表示启动成功,其余情况,则可能存在问题。 - - > 注:如果在 be.INFO 中查看不到启动失败的信息,也许在 be.out 中可以看到。 - -3. 搭建系统后,如何确定 FE、BE 连通性正常 - - 首先确认 FE 和 BE 进程都已经单独正常启动,并确认已经通过 `ADD BACKEND` 或者 `ADD FOLLOWER/OBSERVER` 语句添加了所有节点。 - - 如果心跳正常,BE 的日志中会显示 ```get heartbeat, host: xx.xx.xx.xx, port: 9020, cluster id: xxxxxx```。如果心跳失败,在 FE 的日志中会出现 ```backend[10001] got Exception: org.apache.thrift.transport.TTransportException``` 类似的字样,或者其他 thrift 通信异常日志,表示 FE 向 10001 这个 BE 的心跳失败。这里需要检查 FE 向 BE host 的心跳端口的连通性。 - - 如果 BE 向 FE 的通信正常,则 BE 日志中会显示 ```finish report task success. return code: 0``` 的字样。否则会出现 ```master client, get client from cache failed``` 的字样。这种情况下,需要检查 BE 向 FE 的 rpc_port 的连通性。 - -4. Doris 各节点认证机制 - - 除了 Master FE 以外,其余角色节点(Follower FE,Observer FE,Backend),都需要通过 `ALTER SYSTEM ADD` 语句先注册到集群,然后才能加入集群。 - - Master FE 在第一次启动时,会在 doris-meta/image/VERSION 文件中生成一个 cluster_id。 - - FE 在第一次加入集群时,会首先从 Master FE 获取这个文件。之后每次 FE 之间的重新连接(FE 重启),都会校验自身 cluster id 是否与已存在的其它 FE 的 cluster id 相同。如果不同,则该 FE 会自动退出。 - - BE 在第一次接收到 Master FE 的心跳时,会从心跳中获取到 cluster id,并记录到数据目录的 `cluster_id` 文件中。之后的每次心跳都会比对 FE 发来的 cluster id。如果 cluster id 不相等,则 BE 会拒绝响应 FE 的心跳。 - - 心跳中同时会包含 Master FE 的 ip。当 FE 切主时,新的 Master FE 会携带自身的 ip 发送心跳给 BE,BE 会更新自身保存的 Master FE 的 ip。 - - > **priority\_network** - > - > priority\_network 是 FE 和 BE 都有一个配置,其主要目的是在多网卡的情况下,协助 FE 或 BE 识别自身 ip 地址。priority\_network 采用 CIDR 表示法:[RFC 4632](https://tools.ietf.org/html/rfc4632) - > - > 当确认 FE 和 BE 连通性正常后,如果仍然出现建表 Timeout 的情况,并且 FE 的日志中有 `backend does not found. host: xxx.xxx.xxx.xxx` 字样的错误信息。则表示 Doris 自动识别的 IP 地址有问题,需要手动设置 priority\_network 参数。 - > - > 出现这个问题的主要原因是:当用户通过 `ADD BACKEND` 语句添加 BE 后,FE 会识别该语句中指定的是 hostname 还是 IP。如果是 hostname,则 FE 会自动将其转换为 IP 地址并存储到元数据中。当 BE 在汇报任务完成信息时,会携带自己的 IP 地址。而如果 FE 发现 BE 汇报的 IP 地址和元数据中不一致时,就会出现如上错误。 - > - > 这个错误的解决方法:1)分别在 FE 和 BE 设置 **priority\_network** 参数。通常 FE 和 BE 都处于一个网段,所以该参数设置为相同即可。2)在 `ADD BACKEND` 语句中直接填写 BE 正确的 IP 地址而不是 hostname,以避免 FE 获取到错误的 IP 地址。 - -5. BE 进程文件句柄数 - - BE进程文件句柄数,受min_file_descriptor_number/max_file_descriptor_number两个参数控制。 - - 如果不在[min_file_descriptor_number, max_file_descriptor_number]区间内,BE进程启动会出错,可以使用ulimit进行设置。 - - min_file_descriptor_number的默认值为65536。 - - max_file_descriptor_number的默认值为131072. - - 举例而言:ulimit -n 65536; 表示将文件句柄设成65536。 - - 启动BE进程之后,可以通过 cat /proc/$pid/limits 查看进程实际生效的句柄数 diff --git a/docs/zh-CN/installing/upgrade.md b/docs/zh-CN/installing/upgrade.md deleted file mode 100644 index 03cf49d727..0000000000 --- a/docs/zh-CN/installing/upgrade.md +++ /dev/null @@ -1,86 +0,0 @@ ---- -{ - "title": "集群升级", - "language": "zh-CN" -} ---- - - - -# 集群升级 - -Doris 可以通过滚动升级的方式,平滑进行升级。建议按照以下步骤进行安全升级。 - -> **注:** -> -> 1. Doris不支持跨两位版本号进行升级,例如:不能从0.13直接升级到0.15,只能通过0.13.x -> 0.14.x -> 0.15.x,三位版本号可以跨版本升级,比如从0.13.15可以直接升级到0.14.13.1,不必一定要升级0.14.7 或者 0.14.12.1这种版本 -> 1. 以下方式均建立在高可用部署的情况下。即数据 3 副本,FE 高可用情况下。 - -## 前置工作 - -1. 关闭集群副本修复和均衡功能 - - 升级过程中会有节点重启,所以可能会触发不必要的集群均衡和副本修复逻辑。可以先通过以下命令关闭: - - ``` - # 关闭副本均衡逻辑。关闭后,不会再触发普通表副本的均衡操作。 - $ mysql-client > admin set frontend config("disable_balance" = "true"); - - # 关闭 colocation 表的副本均衡逻辑。关闭后,不会再触发 colocation 表的副本重分布操作。 - $ mysql-client > admin set frontend config("disable_colocate_balance" = "true"); - - # 关闭副本调度逻辑。关闭后,所有已产生的副本修复和均衡任务不会再被调度。 - $ mysql-client > admin set frontend config("disable_tablet_scheduler" = "true"); - ``` - - 当集群升级完毕后,在通过以上命令将对应配置设为原值即可。 - -2. **重要!!在升级之前需要备份元数据(整个目录都需要备份)!!** - -## 测试 BE 升级正确性 - -1. 任意选择一个 BE 节点,部署最新的 palo_be 二进制文件。 -2. 重启 BE 节点,通过 BE 日志 be.INFO,查看是否启动成功。 -3. 如果启动失败,可以先排查原因。如果错误不可恢复,可以直接通过 DROP BACKEND 删除该 BE、清理数据后,使用上一个版本的 palo_be 重新启动 BE。然后重新 ADD BACKEND。(**该方法会导致丢失一个数据副本,请务必确保3副本完整的情况下,执行这个操作!!!**) - -## 测试 FE 元数据兼容性 - -0. **重要!!元数据兼容性异常很可能导致数据无法恢复!!** -1. 单独使用新版本部署一个测试用的 FE 进程(比如自己本地的开发机)。 -2. 修改测试用的 FE 的配置文件 fe.conf,将所有端口设置为**与线上不同**。 -3. 在 fe.conf 添加配置:cluster_id=123456 -4. 在 fe.conf 添加配置:metadata\_failure_recovery=true -5. 拷贝线上环境 Master FE 的元数据目录 doris-meta 到测试环境 -6. 将拷贝到测试环境中的 doris-meta/image/VERSION 文件中的 cluster_id 修改为 123456(即与第3步中相同) -7. 在测试环境中,运行 sh bin/start_fe.sh 启动 FE -8. 通过 FE 日志 fe.log 观察是否启动成功。 -9. 如果启动成功,运行 sh bin/stop_fe.sh 停止测试环境的 FE 进程。 -10. **以上 2-6 步的目的是防止测试环境的FE启动后,错误连接到线上环境中。** - -## 升级准备 - -1. 在完成数据正确性验证后,将 BE 和 FE 新版本的二进制文件分发到各自目录下。 -2. 通常小版本升级,BE 只需升级 palo_be;而 FE 只需升级 palo-fe.jar。如果是大版本升级,则可能需要升级其他文件(包括但不限于 bin/ lib/ 等等)如果你不清楚是否需要替换其他文件,建议全部替换。 - -## 滚动升级 - -1. 确认新版本的文件部署完成后。逐台重启 FE 和 BE 实例即可。 -2. 建议逐台重启 BE 后,再逐台重启 FE。因为通常 Doris 保证 FE 到 BE 的向后兼容性,即老版本的 FE 可以访问新版本的 BE。但可能不支持老版本的 BE 访问新版本的 FE。 -3. 建议确认前一个实例启动成功后,再重启下一个实例。实例启动成功的标识,请参阅安装部署文档。 diff --git a/docs/zh-CN/internal/doris_storage_optimization.md b/docs/zh-CN/internal/doris_storage_optimization.md deleted file mode 100644 index f2816494e1..0000000000 --- a/docs/zh-CN/internal/doris_storage_optimization.md +++ /dev/null @@ -1,234 +0,0 @@ ---- -{ - "title": "Doris存储文件格式优化", - "language": "zh-CN" -} ---- - - - -# Doris存储文件格式优化 # - -## 文件格式 ## - -![](/images/segment_v2.png) -
图1. doris segment文件格式
- -文件包括: -- 文件开始是8个字节的magic code,用于识别文件格式和版本 -- Data Region:用于存储各个列的数据信息,这里的数据是按需分page加载的 -- Index Region: doris中将各个列的index数据统一存储在Index Region,这里的数据会按照列粒度进行加载,所以跟列的数据信息分开存储 -- Footer信息 - - FileFooterPB:定义文件的元数据信息 - - 4个字节的footer pb内容的checksum - - 4个字节的FileFooterPB消息长度,用于读取FileFooterPB - - 8个字节的MAGIC CODE,之所以在末位存储,是方便不同的场景进行文件类型的识别 - -文件中的数据按照page的方式进行组织,page是编码和压缩的基本单位。现在的page类型包括以下几种: - -### DataPage ### - -DataPage分为两种:nullable和non-nullable的data page。 - -nullable的data page内容包括: -``` - - +----------------+ - | value count | - |----------------| - | first row id | - |----------------| - | bitmap length | - |----------------| - | null bitmap | - |----------------| - | data | - |----------------| - | checksum | - +----------------+ -``` - -non-nullable data page结构如下: - -``` - |----------------| - | value count | - |----------------| - | first row id | - |----------------| - | data | - |----------------| - | checksum | - +----------------+ -``` - -其中各个字段含义如下: - -- value count - - 表示page中的行数 -- first row id - - page中第一行的行号 -- bitmap length - - 表示接下来bitmap的字节数 -- null bitmap - - 表示null信息的bitmap -- data - - 存储经过encoding和compress之后的数据 - - 需要在数据的头部信息中写入:is_compressed - - 各种不同编码的data需要在头部信息写入一些字段信息,以实现数据的解析 - - TODO:添加各种encoding的header信息 -- checksum - - 存储page粒度的校验和,包括page的header和之后的实际数据 - - -### Bloom Filter Pages ### - -针对每个bloom filter列,会在page的粒度相应的生成一个bloom filter的page,保存在bloom filter pages区域 - -### Ordinal Index Page ### - -针对每个列,都会按照page粒度,建立行号的稀疏索引。内容为这个page的起始行的行号到这个block的指针(包括offset和length) - -### Short Key Index page ### - -我们会每隔N行(可配置)生成一个short key的稀疏索引,索引的内容为:short key->行号(ordinal) - -### Column的其他索引 ### - -该格式设计支持后续扩展其他的索引信息,比如bitmap索引,spatial索引等等,只需要将需要的数据写到现有的列数据后面,并且添加对应的元数据字段到FileFooterPB中 - -### 元数据定义 ### -SegmentFooterPB的定义为: - -``` -message ColumnPB { - required int32 unique_id = 1; // 这里使用column id, 不使用column name是因为计划支持修改列名 - optional string name = 2; // 列的名字, 当name为__DORIS_DELETE_SIGN__, 表示该列为隐藏的删除列 - required string type = 3; // 列类型 - optional bool is_key = 4; // 是否是主键列 - optional string aggregation = 5; // 聚合方式 - optional bool is_nullable = 6; // 是否有null - optional bytes default_value = 7; // 默认值 - optional int32 precision = 8; // 精度 - optional int32 frac = 9; - optional int32 length = 10; // 长度 - optional int32 index_length = 11; // 索引长度 - optional bool is_bf_column = 12; // 是否有bf词典 - optional bool has_bitmap_index = 15 [default=false]; // 是否有bitmap索引 -} - -// page偏移 -message PagePointerPB { - required uint64 offset; // page在文件中的偏移 - required uint32 length; // page的大小 -} - -message MetadataPairPB { - optional string key = 1; - optional bytes value = 2; -} - -message ColumnMetaPB { - optional ColumnMessage encoding; // 编码方式 - - optional PagePointerPB dict_page // 词典page - repeated PagePointerPB bloom_filter_pages; // bloom filter词典信息 - optional PagePointerPB ordinal_index_page; // 行号索引数据 - optional PagePointerPB page_zone_map_page; // page级别统计信息索引数据 - - optional PagePointerPB bitmap_index_page; // bitmap索引数据 - - optional uint64 data_footprint; // 列中索引的大小 - optional uint64 index_footprint; // 列中数据的大小 - optional uint64 raw_data_footprint; // 原始列数据大小 - - optional CompressKind compress_kind; // 列的压缩方式 - - optional ZoneMapPB column_zone_map; //文件级别的过滤条件 - repeated MetadataPairPB column_meta_datas; -} - -message SegmentFooterPB { - optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用 - repeated ColumnPB schema = 5; // 列Schema - optional uint64 num_values = 4; // 文件中保存的行数 - optional uint64 index_footprint = 7; // 索引大小 - optional uint64 data_footprint = 8; // 数据大小 - optional uint64 raw_data_footprint = 8; // 原始数据大小 - - optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式 - repeated ColumnMetaPB column_metas = 10; // 列元数据 - optional PagePointerPB key_index_page; // short key索引page -} - -``` - -## 读写逻辑 ## - -### 写入 ### - -大体的写入流程如下: -1. 写入magic -2. 根据schema信息,生成对应的ColumnWriter,每个ColumnWriter按照不同的类型,获取对应的encoding信息(可配置),根据encoding,生成对应的encoder -3. 调用encoder->add(value)进行数据写入,每个K行,生成一个short key index entry,并且,如果当前的page满足一定条件(大小超过1M或者行数为K),就生成一个新的page,缓存在内存中。 -4. 不断的循环步骤3,直到数据写入完成。将各个列的数据依序刷入文件中 -5. 生成FileFooterPB信息,写入文件中。 - -相关的问题: - -- short key的索引如何生成? - - 现在还是按照每隔多少行生成一个short key的稀疏索引,保持每隔1024行生成一个short的稀疏索引,具体的内容是:short key -> ordinal - -- ordinal索引里面应该存什么? - - 存储page的第一个ordinal到page pointer的映射信息 -- 不同encoding类型的page里存什么? - - 词典压缩 - - plain - - rle - - bshuf - -### 读取 ### - -1. 读取文件的magic,判断文件类型和版本 -2. 读取FileFooterPB,进行checksum校验 -3. 按照需要的列,读取short key索引和对应列的数据ordinal索引信息 -4. 使用start key和end key,通过short key索引定位到要读取的行号,然后通过ordinal索引确定需要读取的row ranges, 同时需要通过统计信息、bitmap索引等过滤需要读取的row ranges -5. 然后按照row ranges通过ordinal索引读取行的数据 - -相关的问题: -1. 如何实现在page内部快速的定位到某一行? - - page内部是的数据是经过encoding的,无法快速进行行级数据的定位。不同的encoding方式,在内部进行快速的行号定位的方案不一样,需要具体分析: - - 如果是rle编码的,需要通过解析rle的header进行skip,直到到达包含该行的那个rle块之后,再进行反解。 - - binary plain encoding:会在page的中存储offset信息,并且会在page header中指定offset信息的offset,读取的时候会先解析offset信息到数组中,这样子就可以通过各个行的offset数据信息快速的定位block某一行的数据 -2. 如何实现块的高效读取?可以考虑将相邻的块在读取的时候进行merge,一次性读取? - 这个需要在读取的时候,判断block是否连续,如果连续,就一次性的读取 - -## 编码 ## - -现有的doris存储中,针对string类型的编码,采用plain encoding的方式,效率比较低。经过对比,发现在百度统计的场景下,数据会因为string类型的编码膨胀超过一倍。所以,计划引入基于词典的编码压缩。 - -## 压缩 ## - -实现可扩展的压缩框架,支持多种压缩算法,方便后续添加新的压缩算法,计划引入zstd压缩。 - -## TODO ## -1. 如何实现嵌套类型?如何在嵌套类型中进行行号定位? -2. 如何优化现在的ScanRange拆分导致的下游bitmap、column statistic统计等进行多次? diff --git a/docs/zh-CN/internal/flink_doris_connector_design.md b/docs/zh-CN/internal/flink_doris_connector_design.md deleted file mode 100644 index ca91982d37..0000000000 --- a/docs/zh-CN/internal/flink_doris_connector_design.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -{ - "title": "Flink Doris Connector设计方案", - "language": "zh-CN" -} - ---- - - - -# GROUPING SETS 设计文档 - -## 1. GROUPING SETS 相关背景知识 - -### 1.1 GROUPING SETS 子句 - -GROUP BY GROUPING SETS 是对 GROUP BY 子句的扩展,它能够在一个 GROUP BY 子句中一次实现多个集合的分组。其结果等价于将多个相应 GROUP BY 子句进行 UNION 操作。 - -特别地,一个空的子集意味着将所有的行聚集到一个分组。 -GROUP BY 子句是只含有一个元素的 GROUP BY GROUPING SETS 的特例。 - -例如,GROUPING SETS 语句: - -``` -SELECT k1, k2, SUM( k3 ) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k1), (k2), ( ) ); -``` - -其查询结果等价于: - -``` -SELECT k1, k2, SUM( k3 ) FROM t GROUP BY k1, k2 -UNION -SELECT k1, null, SUM( k3 ) FROM t GROUP BY k1 -UNION -SELECT null, k2, SUM( k3 ) FROM t GROUP BY k2 -UNION -SELECT null, null, SUM( k3 ) FROM t -``` - -下面是一个实际数据的例子: - -``` -mysql> SELECT * FROM t; -+------+------+------+ -| k1 | k2 | k3 | -+------+------+------+ -| a | A | 1 | -| a | A | 2 | -| a | B | 1 | -| a | B | 3 | -| b | A | 1 | -| b | A | 4 | -| b | B | 1 | -| b | B | 5 | -+------+------+------+ -8 rows in set (0.01 sec) - -mysql> SELECT k1, k2, SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ); -+------+------+-----------+ -| k1 | k2 | sum(`k3`) | -+------+------+-----------+ -| b | B | 6 | -| a | B | 4 | -| a | A | 3 | -| b | A | 5 | -| NULL | B | 10 | -| NULL | A | 8 | -| a | NULL | 7 | -| b | NULL | 11 | -| NULL | NULL | 18 | -+------+------+-----------+ -9 rows in set (0.06 sec) -``` - -### 1.2 ROLLUP 子句 - -ROLLUP 是对 GROUPING SETS 的扩展。 - -``` -SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY ROLLUP(a,b,c) -``` - -这个 ROLLUP 等价于下面的 GROUPING SETS: - -``` -GROUPING SETS ( -(a,b,c), -( a, b ), -( a), -( ) -) -``` - -### 1.3 CUBE 子句 - -CUBE 也是对 GROUPING SETS 的扩展。 - -``` -CUBE ( e1, e2, e3, ... ) -``` - -其含义是 GROUPING SETS 后面列表中的所有子集。 - -例如,CUBE ( a, b, c ) 等价于下面的 GROUPING SETS: - -``` -GROUPING SETS ( -( a, b, c ), -( a, b ), -( a, c ), -( a ), -( b, c ), -( b ), -( c ), -( ) -) -``` - -### 1.4 GROUPING 和 GROUPING_ID 函数 -当我们没有统计某一列时,它的值显示为 NULL,这也可能是列本身就有 NULL 值,这就需要一种方法区分是没有统计还是值本来就是 NULL。为此引入 GROUPING 和 GROUPING_ID 函数。 -GROUPING(column:Column) 函数用于区分分组后的单个列是普通列和聚合列。如果是聚合列,则返回1,反之,则是0. GROUPING() 只能有一个参数列。 - -GROUPING_ID(column1, column2) 则根据指定的column 顺序,否则根据聚合的时候给的集合的元素顺序,计算出一个列列表的 bitmap 值,一个列如果是聚合列为0,否则为1. GROUPING_ID()函数返回位向量的十进制值。 -比如 [0 1 0] ->2 从下列第三个查询可以看到这种对应关系 - -例如,对于下面的表: - -``` -mysql> select * from t; -+------+------+------+ -| k1 | k2 | k3 | -+------+------+------+ -| a | A | 1 | -| a | A | 2 | -| a | B | 1 | -| a | B | 3 | -| b | A | 1 | -| b | A | 4 | -| b | B | 1 | -| b | B | 5 | -+------+------+------+ -``` - -grouping sets 的结果如下: - -``` -mysql> SELECT k1, k2, GROUPING(k1), GROUPING(k2), SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ); -+------+------+----------------+----------------+-----------+ -| k1 | k2 | grouping(`k1`) | grouping(`k2`) | sum(`k3`) | -+------+------+----------------+----------------+-----------+ -| a | A | 0 | 0 | 3 | -| a | B | 0 | 0 | 4 | -| a | NULL | 0 | 1 | 7 | -| b | A | 0 | 0 | 5 | -| b | B | 0 | 0 | 6 | -| b | NULL | 0 | 1 | 11 | -| NULL | A | 1 | 0 | 8 | -| NULL | B | 1 | 0 | 10 | -| NULL | NULL | 1 | 1 | 18 | -+------+------+----------------+----------------+-----------+ -9 rows in set (0.02 sec) - -mysql> SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ); -+------+------+-------------------------+-----------+ -| k1 | k2 | grouping_id(`k1`, `k2`) | sum(`k3`) | -+------+------+-------------------------+-----------+ -| a | A | 0 | 3 | -| a | B | 0 | 4 | -| a | NULL | 1 | 7 | -| b | A | 0 | 5 | -| b | B | 0 | 6 | -| b | NULL | 1 | 11 | -| NULL | A | 2 | 8 | -| NULL | B | 2 | 10 | -| NULL | NULL | 3 | 18 | -+------+------+-------------------------+-----------+ -9 rows in set (0.02 sec) - -mysql> SELECT k1, k2, grouping(k1), grouping(k2), GROUPING_ID(k1,k2), SUM(k4) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ) order by k1, k2; -+------+------+----------------+----------------+-------------------------+-----------+ -| k1 | k2 | grouping(`k1`) | grouping(`k2`) | grouping_id(`k1`, `k2`) | sum(`k4`) | -+------+------+----------------+----------------+-------------------------+-----------+ -| a | A | 0 | 0 | 0 | 3 | -| a | B | 0 | 0 | 0 | 4 | -| a | NULL | 0 | 1 | 1 | 7 | -| b | A | 0 | 0 | 0 | 5 | -| b | B | 0 | 0 | 0 | 6 | -| b | NULL | 0 | 1 | 1 | 11 | -| NULL | A | 1 | 0 | 2 | 8 | -| NULL | B | 1 | 0 | 2 | 10 | -| NULL | NULL | 1 | 1 | 3 | 18 | -+------+------+----------------+----------------+-------------------------+-----------+ -9 rows in set (0.02 sec) - -``` - -### 1.5 GROUPING SETS 的组合与嵌套 - -首先,一个 GROUP BY 子句本质上是一个 GROUPING SETS 的特例, 例如: - -``` - GROUP BY a -等同于 - GROUP BY GROUPING SETS((a)) -同样地, - GROUP BY a,b,c -等同于 - GROUP BY GROUPING SETS((a,b,c)) -``` - -同样的,CUBE 和 ROLLUP 也可以展开成 GROUPING SETS,因此 GROUP BY, CUBE, ROLLUP, GROUPING SETS 的各种组合和嵌套本质上就是 GROUPING SETS 的组合与嵌套。 - -对于 GROUPING SETS 的嵌套,语义上等价于将嵌套内的语句直接写到外面。(参考:),其中写道: - -``` -The CUBE and ROLLUP constructs can be used either directly in the GROUP BY clause, or nested inside a GROUPING SETS clause. If one GROUPING SETS clause is nested inside another, the effect is the same as if all the elements of the inner clause had been written directly in the outer clause. -``` - -对于多个 GROUPING SETS 的组合列表,很多数据库认为是叉乘(cross product)的关系。 - -例如: - -``` -GROUP BY a, CUBE (b, c), GROUPING SETS ((d), (e)) - -等同于: - -GROUP BY GROUPING SETS ( -(a, b, c, d), (a, b, c, e), -(a, b, d), (a, b, e), -(a, c, d), (a, c, e), -(a, d), (a, e) -) -``` - -对于 GROUPING SETS 的组合与嵌套,各个数据库支持不太一样。例如 snowflake 不支持任何的组合和嵌套。 -() - -Oracle 既支持组合,也支持嵌套。 -() - -Presto 支持组合,但不支持嵌套。 -() - -## 2. 设计目标 - -从语法上支持 GROUPING SETS, ROLLUP 和 CUBE。实现上述所述的1.1, 1.2, 1.3 1.4. - -对于1.6 GROUPING SETS 的组合与嵌套 先不实现。 - -具体语法列出如下: - -### 2.1 GROUPING SETS 语法 - -``` -SELECT ... -FROM ... -[ ... ] -GROUP BY GROUPING SETS ( groupSet [ , groupSet [ , ... ] ] ) -[ ... ] - -groupSet ::= { ( expr [ , expr [ , ... ] ] )} - - -各种表达式,包括列名. - -``` - -### 2.2 ROLLUP 语法 - -``` -SELECT ... -FROM ... -[ ... ] -GROUP BY ROLLUP ( expr [ , expr [ , ... ] ] ) -[ ... ] - - -各种表达式,包括列名. - -``` - -### 2.3 CUBE 语法 - -``` -SELECT ... -FROM ... -[ ... ] -GROUP BY CUBE ( expr [ , expr [ , ... ] ] ) -[ ... ] - - -各种表达式,包括列名. - -``` - -## 3. 实现方案 - -### 3.1 整体思路 - -既然 GROUPING SET 子句逻辑上等价于多个相应 GROUP BY 子句的 UNION,可以通过扩展输入行(此输入行已经是通过下推条件过滤和投影后的), 在此基础上进行一个单一的 GROUP BY 操作来达到目的。 - -关键是怎样扩展输入行呢?下面举例说明: - -例如,对应下面的语句: - -``` -SELECT a, b FROM src GROUP BY a, b GROUPING SETS ((a, b), (a), (b), ()); - -``` - -假定 src 表的数据如下: - -``` -1, 2 -3, 4 - -``` - -根据 GROUPING SETS 子句给出的列表,可以将输入行扩展为下面的 8 行 (GROUPING SETS集合数 * 行数, 同时为每行生成对应的 全列的GROUPING_ID: 和其他grouping 函数的值 - -``` -1, 2 (GROUPING_ID: a, b -> 00->0) -1, null (GUPING_ID: a, null -> 01 -> 1) -null, 2 (GROUPING_ID: null, b -> 10 -> 2) -null, null (GROUPING_ID: null, null -> 11 -> 3) - -3, 4 (GROUPING_ID: a, b -> 00 -> 0) -3, null (GROUPING_ID: a, null -> 01 -> 1) -null, 4 (GROUPING_ID: null, b -> 10 -> 2) -null, null (GROUPING_ID: null, null -> 11 -> 3) - -``` - -然后,将上面的 8 行数据作为输入,对 a, b, GROUPING_ID 进行 GROUP BY 操作即可。 - -### 3.2 具体例子验证说明 - -假设有一个 t 表,包含如下列和数据: - -``` -mysql> select * from t; -+------+------+------+ -| k1 | k2 | k3 | -+------+------+------+ -| a | A | 1 | -| a | A | 2 | -| a | B | 1 | -| a | B | 3 | -| b | A | 1 | -| b | A | 4 | -| b | B | 1 | -| b | B | 5 | -+------+------+------+ -8 rows in set (0.01 sec) - -``` - -对于如下的查询: - -``` -SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ((k1, k2), (k1), (k2), ()); - -``` - -首先,对输入行进行扩展,每行数据扩展成 4 行 (GROUPING SETS子句的集合数目),同时增加 GROUPING_ID() 列 : - -例如 a, A, 1 扩展后变成下面的 4 行: - -``` -+------+------+------+-------------------------+ -| k1 | k2 | k3 | GROUPING_ID(`k1`, `k2`) | -+------+------+------+-------------------------+ -| a | A | 1 | 0 | -| a | NULL | 1 | 1 | -| NULL | A | 1 | 2 | -| NULL | NULL | 1 | 3 | -+------+------+------+-------------------------+ - -``` - -最终, 全部扩展后的输入行如下(总共 32 行): - -``` -+------+------+------+-------------------------+ -| k1 | k2 | k3 | GROUPING_ID(`k1`, `k2`) | -+------+------+------+-------------------------+ -| a | A | 1 | 0 | -| a | A | 2 | 0 | -| a | B | 1 | 0 | -| a | B | 3 | 0 | -| b | A | 1 | 0 | -| b | A | 4 | 0 | -| b | B | 1 | 0 | -| b | B | 5 | 0 | -| a | NULL | 1 | 1 | -| a | NULL | 1 | 1 | -| a | NULL | 2 | 1 | -| a | NULL | 3 | 1 | -| b | NULL | 1 | 1 | -| b | NULL | 1 | 1 | -| b | NULL | 4 | 1 | -| b | NULL | 5 | 1 | -| NULL | A | 1 | 2 | -| NULL | A | 1 | 2 | -| NULL | A | 2 | 2 | -| NULL | A | 4 | 2 | -| NULL | B | 1 | 2 | -| NULL | B | 1 | 2 | -| NULL | B | 3 | 2 | -| NULL | B | 5 | 2 | -| NULL | NULL | 1 | 3 | -| NULL | NULL | 1 | 3 | -| NULL | NULL | 1 | 3 | -| NULL | NULL | 1 | 3 | -| NULL | NULL | 2 | 3 | -| NULL | NULL | 3 | 3 | -| NULL | NULL | 4 | 3 | -| NULL | NULL | 5 | 3 | -+------+------+------+-------------------------+ -32 rows in set. - -``` - -现在对k1, k2, GROUPING_ID(`k1`, `k2`) 进行 GROUP BY: - -``` -+------+------+-------------------------+-----------+ -| k1 | k2 | grouping_id(`k1`, `k2`) | sum(`k3`) | -+------+------+-------------------------+-----------+ -| a | A | 0 | 3 | -| a | B | 0 | 4 | -| a | NULL | 1 | 7 | -| b | A | 0 | 5 | -| b | B | 0 | 6 | -| b | NULL | 1 | 11 | -| NULL | A | 2 | 8 | -| NULL | B | 2 | 10 | -| NULL | NULL | 3 | 18 | -+------+------+-------------------------+-----------+ -9 rows in set (0.02 sec) - -``` - -可以看到,其结果与对 GROUPING SETS 子句后每个子集进行 GROUP BY 后再进行 UNION 的结果一致。 - -``` -select k1, k2, sum(k3) from t group by k1, k2 -UNION ALL -select NULL, k2, sum(k3) from t group by k2 -UNION ALL -select k1, NULL, sum(k3) from t group by k1 -UNION ALL -select NULL, NULL, sum(k3) from t; - -+------+------+-----------+ -| k1 | k2 | sum(`k3`) | -+------+------+-----------+ -| b | B | 6 | -| b | A | 5 | -| a | A | 3 | -| a | B | 4 | -| a | NULL | 7 | -| b | NULL | 11 | -| NULL | B | 10 | -| NULL | A | 8 | -| NULL | NULL | 18 | -+------+------+-----------+ -9 rows in set (0.06 sec) - -``` - -### 3.3 FE 规划阶段 - -#### 3.3.1 主要任务 - -1. 引入 GroupByClause 类,封装 Group By 相关信息,替换原有的 groupingExprs. -2. 增加 Grouping Sets, Cube 和 RollUp 的语法支持和语法检查、错误处理和错误信息; -3. 在 SelectStmt 类中增加 GroupByClause 成员; -4. 引入 GroupingFunctionCallExpr 类,封装grouping 和grouping_id 函数调用 -5. 引入 VirtualSlot 类,封装grouping,grouping_id 生成的虚拟列和实际列的对应关系 -6. 增加虚拟列 GROUPING_ID 和其他grouping,grouping_id 函数对应的虚拟列,并将此列加入到原有的 groupingExprs 表达式列表中; -7. 增加一个 PlanNode,考虑更通用的功能,命名为 RepeatNode。对于 GroupingSets 的聚合,在执行计划中插入 RepeatNode。 - -#### 3.3.2 Tuple - -在 GroupByClause 类中为了将 GROUPING_ID 加到 groupingExprs 表达式列表中,需要创建 virtual SlotRef, 相应的,需要对这个 slot 创建一个 tuple, 叫 GROUPING_ID Tuple。 - -对于 RepeatNode 这个执行计划,其输入是子节点的所有 tuple, 输出的 tuple 除了 repeat 子节点的数据外,还需要填写 GROUPING_ID 和其他grouping,grouping_id 对应的虚拟列,因此。 - - -### 3.4 BE 查询执行阶段 - -主要任务: - -1. 通过 RepeatNode 的执行类,增加扩展输入行的逻辑,其功能是在聚合之前将原有数据进行 repeat:对每行增加一列 GROUPING_ID, 然后按照 GroupingSets 中的集合数进行 repeat,并对对应列置为 null。根据grouping list设置新增虚拟列的值 -2. 实现 grouping_id() 和grouping() 函数。 - - - - diff --git a/docs/zh-CN/internal/metadata-design.md b/docs/zh-CN/internal/metadata-design.md deleted file mode 100644 index a1cdd29ee2..0000000000 --- a/docs/zh-CN/internal/metadata-design.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -{ - "title": "元数据设计文档", - "language": "zh-CN" -} ---- - - - -# 元数据设计文档 - -## 名词解释 - -* FE:Frontend,即 Doris 的前端节点。主要负责接收和返回客户端请求、元数据以及集群管理、查询计划生成等工作。 -* BE:Backend,即 Doris 的后端节点。主要负责数据存储与管理、查询计划执行等工作。 -* bdbje:[Oracle Berkeley DB Java Edition](http://www.oracle.com/technetwork/database/berkeleydb/overview/index-093405.html)。在 Doris 中,我们使用 bdbje 完成元数据操作日志的持久化、FE 高可用等功能。 - -## 整体架构 -![](/images/palo_architecture.jpg) - -如上图,Doris 的整体架构分为两层。多个 FE 组成第一层,提供 FE 的横向扩展和高可用。多个 BE 组成第二层,负责数据存储与管理。本文主要介绍 FE 这一层中,元数据的设计与实现方式。 - -1. FE 节点分为 follower 和 observer 两类。各个 FE 之间,通过 bdbje([BerkeleyDB Java Edition](http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/overview/index-093405.html))进行 leader 选举,数据同步等工作。 - -2. follower 节点通过选举,其中一个 follower 成为 leader 节点,负责元数据的写入操作。当 leader 节点宕机后,其他 follower 节点会重新选举出一个 leader,保证服务的高可用。 - -3. observer 节点仅从 leader 节点进行元数据同步,不参与选举。可以横向扩展以提供元数据的读服务的扩展性。 - -> 注:follower 和 observer 对应 bdbje 中的概念为 replica 和 observer。下文可能会同时使用两种名称。 - -## 元数据结构 - -Doris 的元数据是全内存的。每个 FE 内存中,都维护一个完整的元数据镜像。在百度内部,一个包含2500张表,100万个分片(300万副本)的集群,元数据在内存中仅占用约 2GB。(当然,查询所使用的中间对象、各种作业信息等内存开销,需要根据实际情况估算。但总体依然维持在一个较低的内存开销范围内。) - -同时,元数据在内存中整体采用树状的层级结构存储,并且通过添加辅助结构,能够快速访问各个层级的元数据信息。 - -下图是 Doris 元信息所存储的内容。 - -![](/images/metadata_contents.png) - -如上图,Doris 的元数据主要存储4类数据: - -1. 用户数据信息。包括数据库、表的 Schema、分片信息等。 -2. 各类作业信息。如导入作业,Clone 作业、SchemaChange 作业等。 -3. 用户及权限信息。 -4. 集群及节点信息。 - -## 数据流 - -![](/images/metadata_stream.png) - -元数据的数据流具体过程如下: - -1. 只有 leader FE 可以对元数据进行写操作。写操作在修改 leader 的内存后,会序列化为一条log,按照 key-value 的形式写入 bdbje。其中 key 为连续的整型,作为 log id,value 即为序列化后的操作日志。 - -2. 日志写入 bdbje 后,bdbje 会根据策略(写多数/全写),将日志复制到其他 non-leader 的 FE 节点。non-leader FE 节点通过对日志回放,修改自身的元数据内存镜像,完成与 leader 节点的元数据同步。 - -3. leader 节点的日志条数达到阈值后(默认 10w 条),会启动 checkpoint 线程。checkpoint 会读取已有的 image 文件,和其之后的日志,重新在内存中回放出一份新的元数据镜像副本。然后将该副本写入到磁盘,形成一个新的 image。之所以是重新生成一份镜像副本,而不是将已有镜像写成 image,主要是考虑写 image 加读锁期间,会阻塞写操作。所以每次 checkpoint 会占用双倍内存空间。 - -4. image 文件生成后,leader 节点会通知其他 non-leader 节点新的 image 已生成。non-leader 主动通过 http 拉取最新的 image 文件,来更换本地的旧文件。 - -5. bdbje 中的日志,在 image 做完后,会定期删除旧的日志。 - -## 实现细节 - -### 元数据目录 - -1. 元数据目录通过 FE 的配置项 `meta_dir` 指定。 - -2. `bdb/` 目录下为 bdbje 的数据存放目录。 - -3. `image/` 目录下为 image 文件的存放目录。 - - * `image.[logid]` 是最新的 image 文件。后缀 `logid` 表明 image 所包含的最后一条日志的 id。 - * `image.ckpt` 是正在写入的 image 文件,如果写入成功,会重命名为 `image.[logid]`,并替换掉旧的 image 文件。 - * `VERSION` 文件中记录着 `cluster_id`。`cluster_id` 唯一标识一个 Doris 集群。是在 leader 第一次启动时随机生成的一个 32 位整型。也可以通过 fe 配置项 `cluster_id` 来指定一个 cluster id。 - * `ROLE` 文件中记录的 FE 自身的角色。只有 `FOLLOWER` 和 `OBSERVER` 两种。其中 `FOLLOWER` 表示 FE 为一个可选举的节点。(注意:即使是 leader 节点,其角色也为 `FOLLOWER`) - -### 启动流程 - -1. FE 第一次启动,如果启动脚本不加任何参数,则会尝试以 leader 的身份启动。在 FE 启动日志中会最终看到 `transfer from UNKNOWN to MASTER`。 - -2. FE 第一次启动,如果启动脚本中指定了 `-helper` 参数,并且指向了正确的 leader FE 节点,那么该 FE 首先会通过 http 向 leader 节点询问自身的角色(即 ROLE)和 cluster_id。然后拉取最新的 image 文件。读取 image 文件,生成元数据镜像后,启动 bdbje,开始进行 bdbje 日志同步。同步完成后,开始回放 bdbje 中,image 文件之后的日志,完成最终的元数据镜像生成。 - - > 注1:使用 `-helper` 参数启动时,需要首先通过 mysql 命令,通过 leader 来添加该 FE,否则,启动时会报错。 - - > 注2:`-helper` 可以指向任何一个 follower 节点,即使它不是 leader。 - - > 注2:bdbje 在同步日志过程中,fe 日志会显示 `xxx detached`, 此时正在进行日志拉取,属于正常现象。 - -3. FE 非第一次启动,如果启动脚本不加任何参数,则会根据本地存储的 ROLE 信息,来确定自己的身份。同时根据本地 bdbje 中存储的集群信息,获取 leader 的信息。然后读取本地的 image 文件,以及 bdbje 中的日志,完成元数据镜像生成。(如果本地 ROLE 中记录的角色和 bdbje 中记录的不一致,则会报错。) - -4. FE 非第一次启动,且启动脚本中指定了 `-helper` 参数。则和第一次启动的流程一样,也会先去询问 leader 角色。但是会和自身存储的 ROLE 进行比较。如果不一致,则会报错。 - -#### 元数据读写与同步 - -1. 用户可以使用 mysql 连接任意一个 FE 节点进行元数据的读写访问。如果连接的是 non-leader 节点,则该节点会将写操作转发给 leader 节点。leader 写成功后,会返回一个 leader 当前最新的 log id。之后,non-leader 节点会等待自身回放的 log id 大于回传的 log id 后,才将命令成功的消息返回给客户端。这种方式保证了任意 FE 节点的 Read-Your-Write 语义。 - - > 注:一些非写操作,也会转发给 leader 执行。比如 `SHOW LOAD` 操作。因为这些命令通常需要读取一些作业的中间状态,而这些中间状态是不写 bdbje 的,因此 non-leader 节点的内存中,是没有这些中间状态的。(FE 之间的元数据同步完全依赖 bdbje 的日志回放,如果一个元数据修改操作不写 bdbje 日志,则在其他 non-leader 节点中是看不到该操作修改后的结果的。) - -2. leader 节点会启动一个 TimePrinter 线程。该线程会定期向 bdbje 中写入一个当前时间的 key-value 条目。其余 non-leader 节点通过回放这条日志,读取日志中记录的时间,和本地时间进行比较,如果发现和本地时间的落后大于指定的阈值(配置项:`meta_delay_toleration_second`。写入间隔为该配置项的一半),则该节点会处于**不可读**的状态。此机制解决了 non-leader 节点在长时间和 leader 失联后,仍然提供过期的元数据服务的问题。 - -3. 各个 FE 的元数据只保证最终一致性。正常情况下,不一致的窗口期仅为毫秒级。我们保证同一 session 中,元数据访问的单调一致性。但是如果同一 client 连接不同 FE,则可能出现元数据回退的现象。(但对于批量更新系统,该问题影响很小。) - -### 宕机恢复 - -1. leader 节点宕机后,其余 follower 会立即选举出一个新的 leader 节点提供服务。 -2. 当多数 follower 节点宕机时,元数据不可写入。当元数据处于不可写入状态下,如果这时发生写操作请求,目前的处理流程是 **FE 进程直接退出**。后续会优化这个逻辑,在不可写状态下,依然提供读服务。 -3. observer 节点宕机,不会影响任何其他节点的状态。也不会影响元数据在其他节点的读写。 diff --git a/docs/zh-CN/internal/spark_load.md b/docs/zh-CN/internal/spark_load.md deleted file mode 100644 index 654d415862..0000000000 --- a/docs/zh-CN/internal/spark_load.md +++ /dev/null @@ -1,212 +0,0 @@ ---- -{ - "title": "Doris支持spark导入设计文档", - "language": "zh-CN" -} ---- - - - -# Doris支持spark导入设计文档 - -## 背景 - -Doris现在支持Broker load/routine load/stream load/mini batch load等多种导入方式。 -spark load主要用于解决初次迁移,大量数据迁移doris的场景,用于提升数据导入的速度。 - -## 名词解释 - -* FE:Frontend,即 Palo 的前端节点。主要负责接收和返回客户端请求、元数据以及集群管理、查询计划生成等工作。 -* BE:Backend,即 Palo 的后端节点。主要负责数据存储与管理、查询计划执行等工作。 -* Tablet: 一个palo table的水平分片称为tablet。 -* Dpp:Data preprocessing,数据预处理模块,通过外部计算资源(Hadoop、Spark)完成对导入数据预处理,包括转化、清洗、分区、排序和聚合等。 - -## 设计 - -### 目标 - -Doris中现有的导入方式中,针对百G级别以上的数据的批量导入支持不是很好,功能上需要修改很多配置,而且可能无法完成导入,性能上会比较慢,并且由于没有读写分离,需要占用较多的cpu等资源。而这种大数据量导入会在用户迁移的时候遇到,所以需要实现基于spark集群的导入功能,利用spark集群的并发能力,完成导入时的ETL计算,排序、聚合等等,满足用户大数据量导入需求,降低用户导入时间和迁移成本。 - -在Spark导入中,需要考虑支持多种spark部署模式,设计上需要兼容多种部署方式,可以考虑先实现yarn集群的部署模式;同时,由于用户数据格式多种多样,需要支持包括csv、parquet、orc等多种格式的数据文件。 - -### 实现方案 - -在将spark导入的设计实现的时候,有必要讲一下现有的导入框架。现在有的导入框架,可以参考《Doris Broker导入实现解析》。 - -#### 方案1 - -参考现有的导入框架和原有适用于百度内部hadoop集群的hadoop导入方式的实现,为了最大程度复用现有的导入框架,降低开发的难度,整体的方案如下: - -用户的导入语句经过语法和语意分析之后,生成LoadStmt,LoadStmt中增加一个isSparkLoad标识字段,如果为true,就会创建出SparkLoadJob,跟BrokerLoadJob类似,会通过状态机机制,实现Job的执行,在PENDING,会创建SparkLoadPendingTask,然后在LOADING阶段还是创建LoadLoadingTask,进行数据导入。在BE中,复用现有的计划执行框架,执行导入计划。 - -实现Spark导入主要需要考虑以下几点: - -##### 语法 - 这块主要考虑用户习惯,导入语句格式上尽量保持跟broker导入语句相似。下面是一个方案: - -``` - LOAD LABEL example_db.label1 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - NEGATIVE - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - columns(k1,k2,k3,v1,v2) - set ( - v3 = v1 + v2, - k4 = hll_hash(k2) - ) - where k1 > 20 - ) - with spark.cluster_name - PROPERTIES - ( - "spark.master" = "yarn", - "spark.executor.cores" = "5", - "spark.executor.memory" = "10g", - "yarn.resourcemanager.address" = "xxx.tc:8032", - "max_filter_ratio" = "0.1", - ); -``` -其中spark.cluster_name为用户导入使用的Spark集群名,可以通过SET PROPERTY来设置,可参考原来Hadoop集群的设置。 -property中的Spark集群设置会覆盖spark.cluster_name中对应的内容。 -各个property的含义如下: -- spark.master是表示spark集群部署模式,支持包括yarn/standalone/local/k8s,预计先实现yarn的支持,并且使用yarn-cluster模式(yarn-client模式一般用于交互式的场景)。 -- spark.executor.cores: executor的cpu个数 -- spark.executor.memory: executor的内存大小 -- yarn.resourcemanager.address:指定yarn的resourcemanager地址 -- max_filter_ratio:指定最大过滤比例阈值 - -##### SparkLoadJob - -用户发送spark load语句,经过parse之后,会创建SparkLoadJob, - -``` -SparkLoadJob: - +-------+-------+ - | PENDING |-----------------| - +-------+-------+ | - | SparkLoadPendingTask | - v | - +-------+-------+ | - | LOADING |-----------------| - +-------+-------+ | - | LoadLoadingTask | - v | - +-------+-------+ | - | COMMITTED |-----------------| - +-------+-------+ | - | | - v v - +-------+-------+ +-------+-------+ - | FINISHED | | CANCELLED | - +-------+-------+ +-------+-------+ - | Λ - +-------------------------+ -``` -上图为SparkLoadJob的执行流程。 - -##### SparkLoadPendingTask -SparkLoadPendingTask主要用来提交spark etl作业到spark集群中。由于spark支持不同部署模型(localhost, standalone, yarn, k8s),所以需要抽象一个通用的接口SparkEtlJob,实现SparkEtl的功能,主要接口包括: -- 提交spark etl任务 -- 取消spark etl的任务 -- 获取spark etl任务状态的接口 - -大体接口如下: -``` -class SparkEtlJob { - // 提交spark etl作业 - // 返回JobId - String submitJob(TBrokerScanRangeParams params); - - // 取消作业,用于支持用户cancel导入作业 - bool cancelJob(String jobId); - - // 获取作业状态,用于判断是否已经完成 - JobStatus getJobStatus(String jobId); -private: - std::list data_descriptions; -}; -``` -可以实现不同的子类,来实现对不同集群部署模式的支持。可以实现SparkEtlJobForYarn用于支持yarn集群的spark导入作业。具体来说上述接口中JobId就是Yarn集群的appid,如何获取appid?一个方案是通过spark-submit客户端提交spark job,然后分析标准错误中的输出,通过文本匹配获取appid。 - -这里需要参考hadoop dpp作业的经验,就是需要考虑任务运行可能因为数据量、集群队列等原因,会达到并发导入作业个数限制,导致后续任务提交失败,这块需要考虑一下任务堆积的问题。一个方案是可以单独设置spark load job并发数限制,并且针对每个用户提供一个并发数的限制,这样各个用户之间的作业可以不用相互干扰,提升用户体验。 - -spark任务执行的事情,包括以下几个关键点: -1. 类型转化(extraction/Transformation) - - 将源文件字段转成具体列类型(判断字段是否合法,进行函数计算等等) -2. 函数计算(Transformation),包括negative计算 - - 完成用户指定的列函数的计算。函数列表:"strftime","time_format","alignment_timestamp","default_value","md5sum","replace_value","now","hll_hash","substitute" -3. Columns from path的提取 -4. 进行where条件的过滤 -5. 进行分区和分桶 -6. 排序和预聚合 - - 因为在OlapTableSink过程中会进行排序和聚合,逻辑上可以不需要进行排序和聚合,但是因为排序和预聚合可以提升在BE端执行导入的效率。**如果在spark etl作业中进行排序和聚合,那么在BE执行导入的时候可以省略这个步骤。**这块可以依据后续测试的情况进行调整。目前看,可以先在etl作业中进行排序。 - 还有一个需要考虑的就是如何支持bitmap类型中的全局字典,string类型的bitmap列需要依赖全局字典。 - 为了告诉下游etl作业是否已经完成已经完成排序和聚合,可以在作业完成的时候生成一个job.json的描述文件,里面包含如下属性: - - ``` - { - "is_segment_file" : "false", - "is_sort" : "true", - "is_agg" : "true", - } - ``` - 其中: - is_sort表示是否排序 - is_agg表示是否聚合 - is_segment_file表示是否生成的是segment文件 - -7. 现在rollup数据的计算都是基于base表,需要考虑能够根据index之间的层级关系,优化rollup数据的生成。 - -这里面相对比较复杂一点就是列的表达式计算的支持。 - -最后,spark load作业完成之后,产出的文件存储格式可以支持csv、parquet、orc,从存储效率上来说,建议默认为parquet。 - -##### LoadLoadingTask - -LoadLoadingTask可以复现现在的逻辑,但是,有一个地方跟BrokerLoadJob不一样的地址就是,经过SparkEtlTask处理过后的数据文件已经完成列映射、函数计算、负导入、过滤、聚合等操作,这个时候LoadLoadingTask就不用进行这些操作,只需要进行简单的列映射和类型转化。 - -##### BE导入任务执行 - -这块可以完全复用现有的导入框架,应该不需要做改动。 - -#### 方案2 - -方案1可以最大限度的复用现有的导入框架,能够快速实现支持大数据量导入的功能。但是存在以下问题,就是经过spark etl处理之后的数据其实已经按照tablet划分好了,但是现有的Broker导入框架还是会对流式读取的数据进行分区和bucket计算,然后经过序列化通过rpc发送到对应的目标BE的机器,有一次序列化和网络IO的开销。 方案2是在SparkEtlJob生成数据的时候,直接生成doris的存储格式Segment文件,然后三个副本需要通过类似clone机制的方式,通过add_rowset接口,进行文件的导入。这种方案具体不一样的地方如下: - -1. 需要在生成的文件中添加tabletid后缀 -2. 在SparkLoadPendingTask类中增加一个接口protected Map> getFilePathMap()用于返回tabletid和文件之间的映射关系, -3. 在BE rpc服务中增加一个spark_push接口,实现拉取源端etl转化之后的文件到本地(可以通过broker读取),然后通过add_rowset接口完成数据的导入,类似克隆的逻辑 -4. 生成新的导入任务SparkLoadLoadingTask,该SparkLoadLoadingTask主要功能就是读取job.json文件,解析其中的属性并且,将属性作为rpc参数,调用spark_push接口,向tablet所在的后端BE发送导入请求,进行数据的导入。BE中spark_push根据is_segment_file来决定如何处理,如果为true,则直接下载segment文件,进行add rowset;如果为false,则走pusher逻辑,实现数据导入。 - -该方案将segment文件的生成也统一放到了spark集群中进行,能够极大的降低doris集群的负载,效率应该会比较高。但是方案2需要依赖于将底层rowset和segment v2的接口打包成独立的so文件,并且通过spark调用该接口来将数据转化成segment文件。 - -## 总结 - -综合以上两种方案,第一种方案的改动量比较小,但是BE做了重复的工作。第二种方案可以参考原有的Hadoop导入框架。所以,计划分两步完成spark load的工作。 - -第一步,按照方案2,实现通过Spark完成导入数据的分区排序聚合,生成parquet格式文件。然后走Hadoop pusher的流程由BE转化格式。 - -第二步,封装segment写入的库,直接生成Doris底层的格式,并且增加一个rpc接口,实现类似clone的导入逻辑。 diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/approx_count_distinct.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/approx_count_distinct.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/approx_count_distinct.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/approx_count_distinct.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/avg.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/avg.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/avg.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/avg.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/bitmap_union.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/bitmap_union.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/bitmap_union.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/bitmap_union.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/count.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/count.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/count.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/count.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/group_concat.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/group_concat.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/group_concat.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/group_concat.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/hll_union_agg.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/hll_union_agg.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/max.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/max.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/max.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/max.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/max_by.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/max_by.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/max_by.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/max_by.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/min.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/min.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/min.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/min.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/min_by.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/min_by.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/min_by.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/min_by.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/percentile.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/percentile.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/percentile.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/percentile.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/percentile_approx.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/percentile_approx.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/percentile_approx.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/percentile_approx.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/stddev.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/stddev.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/stddev.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/stddev.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/stddev_samp.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/stddev_samp.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/stddev_samp.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/stddev_samp.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/sum.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/sum.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/sum.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/sum.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/topn.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/topn.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/topn.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/topn.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/var_samp.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/var_samp.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/var_samp.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/var_samp.md diff --git a/docs/zh-CN/sql-reference/sql-functions/aggregate-functions/variance.md b/docs/zh-CN/sql-manual/sql-functions/aggregate-functions/variance.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/aggregate-functions/variance.md rename to docs/zh-CN/sql-manual/sql-functions/aggregate-functions/variance.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_and.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_and.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_and_count.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and_count.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_and_count.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and_count.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_and_not.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and_not.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_and_not.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and_not.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_and_not_count.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and_not_count.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_and_not_count.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_and_not_count.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_contains.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_contains.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_contains.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_contains.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_empty.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_empty.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_empty.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_empty.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_from_string.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_from_string.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_from_string.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_from_string.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_has_all.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_has_all.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_has_all.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_has_all.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_has_any.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_has_any.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_has_any.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_has_any.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_hash.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_hash.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_hash.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_hash.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_intersect.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_intersect.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_max.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_max.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_max.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_max.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_min.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_min.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_min.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_min.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_not.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_not.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_not.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_not.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_or.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_or.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_or.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_or.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_or_count.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_or_count.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_or_count.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_or_count.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_subset_in_range.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_in_range.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_subset_in_range.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_subset_limit.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_subset_limit.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_subset_limit.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_to_string.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_to_string.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_to_string.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_to_string.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_union.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_union.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_xor.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_xor.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_xor.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_xor.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_xor_count.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_xor_count.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_xor_count.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/bitmap_xor_count.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_union_count.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/sub_bitmap.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/sub_bitmap.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/sub_bitmap.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/sub_bitmap.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/to_bitmap.md b/docs/zh-CN/sql-manual/sql-functions/bitmap-functions/to_bitmap.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitmap-functions/to_bitmap.md rename to docs/zh-CN/sql-manual/sql-functions/bitmap-functions/to_bitmap.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitwise-functions/bitand.md b/docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitand.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitwise-functions/bitand.md rename to docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitand.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitwise-functions/bitnot.md b/docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitnot.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitwise-functions/bitnot.md rename to docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitnot.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitwise-functions/bitor.md b/docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitor.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitwise-functions/bitor.md rename to docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitor.md diff --git a/docs/zh-CN/sql-reference/sql-functions/bitwise-functions/bitxor.md b/docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitxor.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/bitwise-functions/bitxor.md rename to docs/zh-CN/sql-manual/sql-functions/bitwise-functions/bitxor.md diff --git a/docs/zh-CN/sql-reference/sql-functions/cast.md b/docs/zh-CN/sql-manual/sql-functions/cast.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/cast.md rename to docs/zh-CN/sql-manual/sql-functions/cast.md diff --git a/docs/zh-CN/sql-reference/sql-functions/conditional-functions/case.md b/docs/zh-CN/sql-manual/sql-functions/conditional-functions/case.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/conditional-functions/case.md rename to docs/zh-CN/sql-manual/sql-functions/conditional-functions/case.md diff --git a/docs/zh-CN/sql-reference/sql-functions/conditional-functions/coalesce.md b/docs/zh-CN/sql-manual/sql-functions/conditional-functions/coalesce.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/conditional-functions/coalesce.md rename to docs/zh-CN/sql-manual/sql-functions/conditional-functions/coalesce.md diff --git a/docs/zh-CN/sql-reference/sql-functions/conditional-functions/if.md b/docs/zh-CN/sql-manual/sql-functions/conditional-functions/if.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/conditional-functions/if.md rename to docs/zh-CN/sql-manual/sql-functions/conditional-functions/if.md diff --git a/docs/zh-CN/sql-reference/sql-functions/conditional-functions/ifnull.md b/docs/zh-CN/sql-manual/sql-functions/conditional-functions/ifnull.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/conditional-functions/ifnull.md rename to docs/zh-CN/sql-manual/sql-functions/conditional-functions/ifnull.md diff --git a/docs/zh-CN/sql-reference/sql-functions/conditional-functions/nullif.md b/docs/zh-CN/sql-manual/sql-functions/conditional-functions/nullif.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/conditional-functions/nullif.md rename to docs/zh-CN/sql-manual/sql-functions/conditional-functions/nullif.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/convert_tz.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/convert_tz.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/convert_tz.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/convert_tz.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/curdate.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/curdate.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/curdate.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/curdate.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/current_timestamp.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/current_timestamp.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/current_timestamp.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/current_timestamp.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/curtime.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/curtime.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/curtime.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/curtime.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/date_add.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/date_add.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/date_add.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/date_add.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/date_format.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/date_format.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/date_format.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/date_format.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/date_sub.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/date_sub.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/date_sub.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/date_sub.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/datediff.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/datediff.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/datediff.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/datediff.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/day.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/day.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/day.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/day.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/dayname.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayname.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/dayname.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayname.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/dayofmonth.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayofmonth.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/dayofmonth.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayofmonth.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/dayofweek.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayofweek.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/dayofweek.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayofweek.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/dayofyear.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayofyear.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/dayofyear.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/dayofyear.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/from_days.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/from_days.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/from_days.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/from_days.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/from_unixtime.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/from_unixtime.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/from_unixtime.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/from_unixtime.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/hour.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/hour.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/hour.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/hour.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/makedate.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/makedate.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/makedate.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/makedate.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/minute.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/minute.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/minute.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/minute.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/month.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/month.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/month.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/month.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/monthname.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/monthname.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/monthname.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/monthname.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/now.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/now.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/now.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/now.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/second.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/second.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/second.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/second.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/str_to_date.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/str_to_date.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/str_to_date.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/str_to_date.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/time_round.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/time_round.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/time_round.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/time_round.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/timediff.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/timediff.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/timediff.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/timediff.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/timestampadd.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/timestampadd.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/timestampadd.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/timestampadd.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/timestampdiff.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/timestampdiff.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/timestampdiff.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/timestampdiff.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/to_date.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/to_date.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/to_date.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/to_date.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/to_days.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/to_days.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/to_days.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/to_days.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/unix_timestamp.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/unix_timestamp.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/unix_timestamp.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/unix_timestamp.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/utc_timestamp.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/utc_timestamp.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/utc_timestamp.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/utc_timestamp.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/week.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/week.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/week.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/week.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/weekday.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/weekday.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/weekday.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/weekday.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/weekofyear.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/weekofyear.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/weekofyear.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/weekofyear.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/year.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/year.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/year.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/year.md diff --git a/docs/zh-CN/sql-reference/sql-functions/date-time-functions/yearweek.md b/docs/zh-CN/sql-manual/sql-functions/date-time-functions/yearweek.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/date-time-functions/yearweek.md rename to docs/zh-CN/sql-manual/sql-functions/date-time-functions/yearweek.md diff --git a/docs/zh-CN/sql-reference/sql-functions/digital-masking.md b/docs/zh-CN/sql-manual/sql-functions/digital-masking.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/digital-masking.md rename to docs/zh-CN/sql-manual/sql-functions/digital-masking.md diff --git a/docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/aes.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/aes.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/aes.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/aes.md diff --git a/docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/md5.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/md5.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/md5.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/md5.md diff --git a/docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/md5sum.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/md5sum.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md diff --git a/docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/sm3.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm3.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/sm3.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm3.md diff --git a/docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/sm3sum.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/sm3sum.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md diff --git a/docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/sm4.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm4.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/encrypt-digest-functions/sm4.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm4.md diff --git a/new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/aes.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/aes.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/aes.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/aes.md diff --git a/new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/md5.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/md5.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/md5.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/md5.md diff --git a/new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/md5sum.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/md5sum.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/md5sum.md diff --git a/new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm3.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/sm3.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm3.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/sm3.md diff --git a/new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/sm3sum.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm3sum.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/sm3sum.md diff --git a/new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm4.md b/docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/sm4.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-functions/encrypt-digest-functions/sm4.md rename to docs/zh-CN/sql-manual/sql-functions/encrypt-dixgest-functions/sm4.md diff --git a/docs/zh-CN/sql-reference/sql-functions/hash-functions/murmur_hash3_32.md b/docs/zh-CN/sql-manual/sql-functions/hash-functions/murmur_hash3_32.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/hash-functions/murmur_hash3_32.md rename to docs/zh-CN/sql-manual/sql-functions/hash-functions/murmur_hash3_32.md diff --git a/docs/zh-CN/sql-reference/sql-functions/json-functions/get_json_double.md b/docs/zh-CN/sql-manual/sql-functions/json-functions/get_json_double.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/json-functions/get_json_double.md rename to docs/zh-CN/sql-manual/sql-functions/json-functions/get_json_double.md diff --git a/docs/zh-CN/sql-reference/sql-functions/json-functions/get_json_int.md b/docs/zh-CN/sql-manual/sql-functions/json-functions/get_json_int.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/json-functions/get_json_int.md rename to docs/zh-CN/sql-manual/sql-functions/json-functions/get_json_int.md diff --git a/docs/zh-CN/sql-reference/sql-functions/json-functions/get_json_string.md b/docs/zh-CN/sql-manual/sql-functions/json-functions/get_json_string.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/json-functions/get_json_string.md rename to docs/zh-CN/sql-manual/sql-functions/json-functions/get_json_string.md diff --git a/docs/zh-CN/sql-reference/sql-functions/json-functions/json_array.md b/docs/zh-CN/sql-manual/sql-functions/json-functions/json_array.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/json-functions/json_array.md rename to docs/zh-CN/sql-manual/sql-functions/json-functions/json_array.md diff --git a/docs/zh-CN/sql-reference/sql-functions/json-functions/json_object.md b/docs/zh-CN/sql-manual/sql-functions/json-functions/json_object.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/json-functions/json_object.md rename to docs/zh-CN/sql-manual/sql-functions/json-functions/json_object.md diff --git a/docs/zh-CN/sql-reference/sql-functions/json-functions/json_quote.md b/docs/zh-CN/sql-manual/sql-functions/json-functions/json_quote.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/json-functions/json_quote.md rename to docs/zh-CN/sql-manual/sql-functions/json-functions/json_quote.md diff --git a/docs/zh-CN/sql-reference/sql-functions/math-functions/conv.md b/docs/zh-CN/sql-manual/sql-functions/math-functions/conv.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/math-functions/conv.md rename to docs/zh-CN/sql-manual/sql-functions/math-functions/conv.md diff --git a/docs/zh-CN/sql-reference/sql-functions/math-functions/pmod.md b/docs/zh-CN/sql-manual/sql-functions/math-functions/pmod.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/math-functions/pmod.md rename to docs/zh-CN/sql-manual/sql-functions/math-functions/pmod.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_astext.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_astext.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_astext.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_astext.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_circle.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_circle.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_circle.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_circle.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_contains.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_contains.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_contains.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_contains.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_distance_sphere.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_distance_sphere.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_distance_sphere.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_geometryfromtext.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_geometryfromtext.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_geometryfromtext.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_linefromtext.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_linefromtext.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_linefromtext.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_linefromtext.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_point.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_point.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_point.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_point.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_polygon.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_polygon.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_polygon.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_polygon.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_x.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_x.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_x.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_x.md diff --git a/docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_y.md b/docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_y.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/spatial-functions/st_y.md rename to docs/zh-CN/sql-manual/sql-functions/spatial-functions/st_y.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/append_trailing_char_if_absent.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/append_trailing_char_if_absent.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/append_trailing_char_if_absent.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/append_trailing_char_if_absent.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/ascii.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/ascii.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/ascii.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/ascii.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/bit_length.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/bit_length.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/bit_length.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/bit_length.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/char_length.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/char_length.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/concat.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/concat.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/concat.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/concat.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/concat_ws.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/concat_ws.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/concat_ws.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/concat_ws.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/ends_with.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/ends_with.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/ends_with.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/ends_with.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/find_in_set.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/find_in_set.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/find_in_set.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/find_in_set.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/hex.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/hex.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/instr.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/instr.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/instr.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/instr.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/lcase.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/lcase.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/lcase.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/lcase.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/left.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/left.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/left.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/length.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/length.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/length.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/like/like.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/like/like.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/like/like.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/like/like.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/like/not_like.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/like/not_like.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/like/not_like.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/like/not_like.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/locate.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/locate.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/locate.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/locate.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/lower.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/lower.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/lower.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/lower.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/lpad.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/lpad.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/lpad.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/lpad.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/ltrim.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/ltrim.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/ltrim.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/ltrim.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/money_format.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/money_format.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/money_format.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/money_format.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/null_or_empty.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/null_or_empty.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/null_or_empty.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/null_or_empty.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/regexp/not_regexp.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/not_regexp.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/regexp/not_regexp.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/not_regexp.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/regexp/regexp.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/regexp.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/regexp/regexp.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/regexp.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/regexp/regexp_extract.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/regexp/regexp_extract.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/regexp_extract.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/regexp/regexp_replace.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/regexp_replace.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/regexp/regexp_replace.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/regexp/regexp_replace.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/repeat.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/repeat.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/repeat.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/repeat.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/replace.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/replace.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/replace.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/replace.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/reverse.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/reverse.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/right.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/right.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/right.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/rpad.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/rpad.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/rpad.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/rpad.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/split_part.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/split_part.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/split_part.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/split_part.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/starts_with.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/starts_with.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/starts_with.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/starts_with.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/strleft.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/strleft.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/strright.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/strright.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/substring.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/substring.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/substring.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/substring.md diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md b/docs/zh-CN/sql-manual/sql-functions/string-functions/unhex.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md rename to docs/zh-CN/sql-manual/sql-functions/string-functions/unhex.md diff --git a/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-bitmap.md b/docs/zh-CN/sql-manual/sql-functions/table-functions/explode-bitmap.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/table-functions/explode-bitmap.md rename to docs/zh-CN/sql-manual/sql-functions/table-functions/explode-bitmap.md diff --git a/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-json-array.md b/docs/zh-CN/sql-manual/sql-functions/table-functions/explode-json-array.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/table-functions/explode-json-array.md rename to docs/zh-CN/sql-manual/sql-functions/table-functions/explode-json-array.md diff --git a/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-numbers.md b/docs/zh-CN/sql-manual/sql-functions/table-functions/explode-numbers.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/table-functions/explode-numbers.md rename to docs/zh-CN/sql-manual/sql-functions/table-functions/explode-numbers.md diff --git a/docs/zh-CN/sql-reference/sql-functions/table-functions/explode-split.md b/docs/zh-CN/sql-manual/sql-functions/table-functions/explode-split.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/table-functions/explode-split.md rename to docs/zh-CN/sql-manual/sql-functions/table-functions/explode-split.md diff --git a/docs/zh-CN/sql-reference/sql-functions/table-functions/outer-combinator.md b/docs/zh-CN/sql-manual/sql-functions/table-functions/outer-combinator.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/table-functions/outer-combinator.md rename to docs/zh-CN/sql-manual/sql-functions/table-functions/outer-combinator.md diff --git a/docs/zh-CN/sql-reference/sql-functions/window-function.md b/docs/zh-CN/sql-manual/sql-functions/window-function.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-functions/window-function.md rename to docs/zh-CN/sql-manual/sql-functions/window-function.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-ROLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/CREATE-USER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-ROLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/DROP-USER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/GRANT.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md b/docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/LDAP.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/REVOKE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PASSWORD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md b/docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Account-Management-Statements/SET-PROPERTY.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md b/docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-DATABASE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/ALTER-VIEW.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/BACKUP.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RECOVER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Backup-and-Restore/RESTORE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-DATABASE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FILE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-FUNCTION.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-INDEX.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-RESOURCE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-TABLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Create/CREATE-VIEW.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-DATABASE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FILE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-FUNCTION.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-INDEX.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-RESOURCE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/DROP-TABLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/BROKER-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CANCEL-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/MULTI-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STOP-SYNC-JOB.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Load/STREAM-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/DELETE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/INSERT.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/Manipulation/UPDATE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Manipulation-Statements/OUTFILE.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/BIGINT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/BIGINT.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/BIGINT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/BIGINT.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/BITMAP.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/BITMAP.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/BITMAP.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/BITMAP.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/BOOLEAN.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/BOOLEAN.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/BOOLEAN.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/BOOLEAN.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/CHAR.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/CHAR.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/CHAR.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/CHAR.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/DATE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DATE.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/DATE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DATE.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/DATETIME.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DATETIME.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/DATETIME.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DATETIME.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/DECIMAL.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DECIMAL.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/DECIMAL.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DECIMAL.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/DOUBLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DOUBLE.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/DOUBLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/DOUBLE.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/FLOAT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/FLOAT.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/FLOAT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/FLOAT.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/HLL.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/HLL.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/HLL.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/HLL.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/INT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/INT.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/INT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/INT.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/LARGEINT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/LARGEINT.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/LARGEINT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/LARGEINT.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/QUANTILE_STATE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/QUANTILE_STATE.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/QUANTILE_STATE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/QUANTILE_STATE.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/SMALLINT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/SMALLINT.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/SMALLINT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/SMALLINT.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/STRING.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/STRING.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/STRING.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/TINYINT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/TINYINT.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/TINYINT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/TINYINT.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Types/VARCHAR.md b/docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/VARCHAR.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Data Types/VARCHAR.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Data-Types/VARCHAR.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CHECK-TABLET.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-CLEAN-TRASH.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-CONFIG.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW TABLET STORAGE FORMAT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-TABLET-STORAGE-FORMAT.md similarity index 100% rename from docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW TABLET STORAGE FORMAT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ADMIN-SHOW-TABLET-STORAGE-FORMAT.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ENABLE-FEATURE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ENABLE-FEATURE.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ENABLE-FEATURE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/ENABLE-FEATURE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/INSTALL-PLUGIN.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/KILL.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/RECOVER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/SET-VARIABLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md b/docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Database-Administration-Statements/UNINSTALL-PLUGIN.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ALTER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKENDS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BACKUP.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-BROKER.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CHARSET.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CHARSET.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CHARSET.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CHARSET.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLLATION.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLLATION.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLLATION.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLLATION.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-COLUMNS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-DATABASE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-FUNCTION.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-CREATE-TABLE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATA.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASE-ID.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DATABASES.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DELETE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-DYNAMIC-PARTITION.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENCRYPT-KEY.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ENGINES.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EVENTS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-EXPORT.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FILE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FRONTENDS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-FUNCTIONS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-GRANTS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-INDEX.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LAST-INSERT.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-PROFILE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD-WARNINGS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-MIGRATIONS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-OPEN-TABLES.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITION-ID.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PARTITIONS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PLUGINS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROC.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCEDURE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCEDURE.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCEDURE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCEDURE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROCESSLIST.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-PROPERTY.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-QUERY-PROFILE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-QUERY-PROFILE.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-QUERY-PROFILE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-QUERY-PROFILE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-REPOSITORIES.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESOURCES.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-RESTORE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLES.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLLUP.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLLUP.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLLUP.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROLLUP.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-ROUTINE-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SMALL-FILES.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SNAPSHOT.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SQL-BLOCK-RULE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STATUS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-STREAM-LOAD.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-SYNC-JOB.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-ID.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLE-STATUS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TABLET.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRANSACTION.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRASH.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRIGGERS.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRIGGERS.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRIGGERS.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-TRIGGERS.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-USER.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VARIABLES.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-VIEW.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-WARNING.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-WARNING.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-WARNING.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-WARNING.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-WHITE-LIST.md b/docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-WHITE-LIST.md similarity index 100% rename from docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-WHITE-LIST.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Show-Statements/SHOW-WHITE-LIST.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/DESCRIBE.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/HELP.md b/docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/HELP.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/HELP.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/HELP.md diff --git a/new-docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/USE.md b/docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/USE.md similarity index 100% rename from new-docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/USE.md rename to docs/zh-CN/sql-manual/sql-reference-v2/Utility-Statements/USE.md diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md deleted file mode 100644 index 3d971ef96a..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/ALTER-USER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-USER", - "language": "zh-CN" -} ---- - - - -## ALTER-USER - -### Description - -### Example - -### Keywords - - ALTER, USER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md deleted file mode 100644 index e112eae810..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-ROLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-ROLE", - "language": "zh-CN" -} ---- - - - -## CREATE-ROLE - -### Description - -### Example - -### Keywords - - CREATE, ROLE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md deleted file mode 100644 index 2890278832..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/CREATE-USER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-USER", - "language": "zh-CN" -} ---- - - - -## CREATE-USER - -### Description - -### Example - -### Keywords - - CREATE, USER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md deleted file mode 100644 index 1fda115022..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-ROLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-ROLE", - "language": "zh-CN" -} ---- - - - -## DROP-ROLE - -### Description - -### Example - -### Keywords - - DROP, ROLE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md deleted file mode 100644 index fca958052f..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/DROP-USER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-USER", - "language": "zh-CN" -} ---- - - - -## DROP-USER - -### Description - -### Example - -### Keywords - - DROP, USER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md deleted file mode 100644 index 4a7476833a..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/GRANT.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "GRANT", - "language": "zh-CN" -} ---- - - - -## GRANT - -### Description - -### Example - -### Keywords - - GRANT - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md deleted file mode 100644 index a276439443..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/LDAP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "LDAP", - "language": "zh-CN" -} ---- - - - -## LDAP - -### Description - -### Example - -### Keywords - - LDAP - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md deleted file mode 100644 index 2fe3441b9f..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/REVOKE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "REVOKE", - "language": "zh-CN" -} ---- - - - -## REVOKE - -### Description - -### Example - -### Keywords - - REVOKE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md deleted file mode 100644 index e7cd62702e..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PASSWORD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SET-PASSWORD", - "language": "zh-CN" -} ---- - - - -## SET-PASSWORD - -### Description - -### Example - -### Keywords - - SET, PASSWORD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md b/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md deleted file mode 100644 index beb3092ab6..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Account-Management-Statements/SET-PROPERTY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SET-PROPERTY", - "language": "zh-CN" -} ---- - - - -## SET-PROPERTY - -### Description - -### Example - -### Keywords - - SET, PROPERTY - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md b/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md deleted file mode 100644 index 0206709fe1..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-ADD-BACKEND", - "language": "zh-CN" -} ---- - - - -## ALTER-SYSTEM-ADD-BACKEND - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, ADD, BACKEND - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md deleted file mode 100644 index 245e8e30ef..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-ADD-FOLLOWER", - "language": "zh-CN" -} ---- - - - -## ALTER-SYSTEM-ADD-FOLLOWER - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, ADD, FOLLOWER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md deleted file mode 100644 index 845a5f2cce..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-ADD-OBSERVER", - "language": "zh-CN" -} ---- - - - -## ALTER-SYSTEM-ADD-OBSERVER - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, ADD, OBSERVER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md b/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md deleted file mode 100644 index 234fea62e9..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-DECOMMISSION-BACKEND", - "language": "zh-CN" -} ---- - - - -## ALTER-SYSTEM-DECOMMISSION-BACKEND - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, DECOMMISSION, BACKEND - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md b/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md deleted file mode 100644 index 43b19d419a..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-DROP-BACKEND", - "language": "zh-CN" -} ---- - - - -## ALTER-SYSTEM-DROP-BACKEND - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, DROP, BACKEND - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md deleted file mode 100644 index 205056b01e..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-DROP-FOLLOWER", - "language": "zh-CN" -} ---- - - - -## ALTER-SYSTEM-DROP-FOLLOWER - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, DROP, FOLLOWER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md deleted file mode 100644 index 3c4510b104..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-SYSTEM-DROP-OBSERVER", - "language": "zh-CN" -} ---- - - - -## ALTER-SYSTEM-DROP-OBSERVER - -### Description - -### Example - -### Keywords - - ALTER, SYSTEM, DROP, OBSERVER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md b/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md deleted file mode 100644 index b7c0e6bd51..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-ALTER-SYSTEM", - "language": "zh-CN" -} ---- - - - -## CANCEL-ALTER-SYSTEM - -### Description - -### Example - -### Keywords - - CANCEL, ALTER, SYSTEM - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md deleted file mode 100644 index 6a96c5914d..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-DATABASE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-DATABASE", - "language": "zh-CN" -} ---- - - - -## ALTER-DATABASE - -### Description - -### Example - -### Keywords - - ALTER, DATABASE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md deleted file mode 100644 index a4aaa9eabb..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-COLUMN", - "language": "zh-CN" -} ---- - - - -## ALTER-TABLE-COLUMN - -### Description - -### Example - -### Keywords - - ALTER, TABLE, COLUMN - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md deleted file mode 100644 index 67b9408154..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-PARTITION", - "language": "zh-CN" -} ---- - - - -## ALTER-TABLE-PARTITION - -### Description - -### Example - -### Keywords - - ALTER, TABLE, PARTITION - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md deleted file mode 100644 index c9ed837e2f..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-PROPERTY", - "language": "zh-CN" -} ---- - - - -## ALTER-TABLE-PROPERTY - -### Description - -### Example - -### Keywords - - ALTER, TABLE, PROPERTY - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md deleted file mode 100644 index 9a02821546..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-RENAME", - "language": "zh-CN" -} ---- - - - -## ALTER-TABLE-RENAME - -### Description - -### Example - -### Keywords - - ALTER, TABLE, RENAME - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md deleted file mode 100644 index 1db4b7c4a1..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-REPLACE", - "language": "zh-CN" -} ---- - - - -## ALTER-TABLE-REPLACE - -### Description - -### Example - -### Keywords - - ALTER, TABLE, REPLACE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md deleted file mode 100644 index 9a88a09ab9..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-TABLE-ROLLUP", - "language": "zh-CN" -} ---- - - - -## ALTER-TABLE-ROLLUP - -### Description - -### Example - -### Keywords - - ALTER, TABLE, ROLLUP - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md deleted file mode 100644 index 5f9c575d85..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/ALTER-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-VIEW", - "language": "zh-CN" -} ---- - - - -## ALTER-VIEW - -### Description - -### Example - -### Keywords - - ALTER, VIEW - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md deleted file mode 100644 index 7e1767e3bc..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-ALTER-TABLE", - "language": "zh-CN" -} ---- - - - -## CANCEL-ALTER-TABLE - -### Description - -### Example - -### Keywords - - CANCEL, ALTER, TABLE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md deleted file mode 100644 index bb60b2e67b..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/BACKUP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "BACKUP", - "language": "zh-CN" -} ---- - - - -## BACKUP - -### Description - -### Example - -### Keywords - - BACKUP - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md deleted file mode 100644 index 07e0b1c94d..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-BACKUP", - "language": "zh-CN" -} ---- - - - -## CANCEL-BACKUP - -### Description - -### Example - -### Keywords - - CANCEL, BACKUP - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md deleted file mode 100644 index 37cb5c4585..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-RESTORE", - "language": "zh-CN" -} ---- - - - -## CANCEL-RESTORE - -### Description - -### Example - -### Keywords - - CANCEL, RESTORE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md deleted file mode 100644 index e345171576..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-REPOSITORY", - "language": "zh-CN" -} ---- - - - -## CREATE-REPOSITORY - -### Description - -### Example - -### Keywords - - CREATE, REPOSITORY - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md deleted file mode 100644 index bb61ff52df..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-REPOSITORY", - "language": "zh-CN" -} ---- - - - -## DROP-REPOSITORY - -### Description - -### Example - -### Keywords - - DROP, REPOSITORY - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md deleted file mode 100644 index 5c7e313e91..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Backup-and-Restore/RESTORE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "RESTORE", - "language": "zh-CN" -} ---- - - - -## RESTORE - -### Description - -### Example - -### Keywords - - RESTORE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md deleted file mode 100644 index dbf4e5dcfd..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-DATABASE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-DATABASE", - "language": "zh-CN" -} ---- - - - -## CREATE-DATABASE - -### Description - -### Example - -### Keywords - - CREATE, DATABASE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md deleted file mode 100644 index 9e03f7a2e9..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-ENCRYPT-KEY", - "language": "zh-CN" -} ---- - - - -## CREATE-ENCRYPT-KEY - -### Description - -### Example - -### Keywords - - CREATE, ENCRYPT, KEY - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md deleted file mode 100644 index a0f9e4ecef..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FILE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-FILE", - "language": "zh-CN" -} ---- - - - -## CREATE-FILE - -### Description - -### Example - -### Keywords - - CREATE, FILE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md deleted file mode 100644 index 7bf226fccb..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-FUNCTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-FUNCTION", - "language": "zh-CN" -} ---- - - - -## CREATE-FUNCTION - -### Description - -### Example - -### Keywords - - CREATE, FUNCTION - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md deleted file mode 100644 index 330b1b6cfa..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-INDEX.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-INDEX", - "language": "zh-CN" -} ---- - - - -## CREATE-INDEX - -### Description - -### Example - -### Keywords - - CREATE, INDEX - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md deleted file mode 100644 index 3aa77450b0..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-MATERIALIZED-VIEW", - "language": "zh-CN" -} ---- - - - -## CREATE-MATERIALIZED-VIEW - -### Description - -### Example - -### Keywords - - CREATE, MATERIALIZED, VIEW - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md deleted file mode 100644 index 22567fece1..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-RESOURCE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-RESOURCE", - "language": "zh-CN" -} ---- - - - -## CREATE-RESOURCE - -### Description - -### Example - -### Keywords - - CREATE, RESOURCE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md deleted file mode 100644 index fe5ed64e4d..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE-LIKE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-TABLE-LIKE", - "language": "zh-CN" -} ---- - - - -## CREATE-TABLE-LIKE - -### Description - -### Example - -### Keywords - - CREATE, TABLE, LIKE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md deleted file mode 100644 index 1b0229c024..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-TABLE.md +++ /dev/null @@ -1,569 +0,0 @@ ---- -{ - "title": "CREATE-TABLE", - "language": "zh-CN" -} ---- - - - -## CREATE-TABLE - -### Description - -该命令用于创建一张表。本文档主语介绍创建 Doris 自维护的表的语法。外部表语法请参阅 [CREATE-EXTERNAL-TABLE] 文档。 - -```sql -CREATE TABLE [IF NOT EXISTS] [database.]table -( - column_definition_list, - [index_definition_list] -) -[engine_type] -[keys_type] -[table_comment] -[partition_info] -distribution_info -[rollup_list] -[properties] -[extra_properties] -``` - -* `column_definition_list` - - 列定义列表: - - `column_definition[, column_definition]` - - * `column_definition` - - 列定义: - - `column_name column_type [KEY] [aggr_type] [NULL] [default_value] [column_comment]` - - * `column_type` - - 列类型,支持以下类型: - - ``` - TINYINT(1字节) - 范围:-2^7 + 1 ~ 2^7 - 1 - SMALLINT(2字节) - 范围:-2^15 + 1 ~ 2^15 - 1 - INT(4字节) - 范围:-2^31 + 1 ~ 2^31 - 1 - BIGINT(8字节) - 范围:-2^63 + 1 ~ 2^63 - 1 - LARGEINT(16字节) - 范围:-2^127 + 1 ~ 2^127 - 1 - FLOAT(4字节) - 支持科学计数法 - DOUBLE(12字节) - 支持科学计数法 - DECIMAL[(precision, scale)] (16字节) - 保证精度的小数类型。默认是 DECIMAL(10, 0) - precision: 1 ~ 27 - scale: 0 ~ 9 - 其中整数部分为 1 ~ 18 - 不支持科学计数法 - DATE(3字节) - 范围:0000-01-01 ~ 9999-12-31 - DATETIME(8字节) - 范围:0000-01-01 00:00:00 ~ 9999-12-31 23:59:59 - CHAR[(length)] - 定长字符串。长度范围:1 ~ 255。默认为1 - VARCHAR[(length)] - 变长字符串。长度范围:1 ~ 65533。默认为1 - HLL (1~16385个字节) - HyperLogLog 列类型,不需要指定长度和默认值。长度根据数据的聚合程度系统内控制。 - 必须配合 HLL_UNION 聚合类型使用。 - BITMAP - bitmap 列类型,不需要指定长度和默认值。表示整型的集合,元素最大支持到2^64 - 1。 - 必须配合 BITMAP_UNION 聚合类型使用。 - ``` - - * `aggr_type` - - 聚合类型,支持以下聚合类型: - - ``` - SUM:求和。适用数值类型。 - MIN:求最小值。适合数值类型。 - MAX:求最大值。适合数值类型。 - REPLACE:替换。对于维度列相同的行,指标列会按照导入的先后顺序,后倒入的替换先导入的。 - REPLACE_IF_NOT_NULL:非空值替换。和 REPLACE 的区别在于对于null值,不做替换。这里要注意的是字段默认值要给NULL,而不能是空字符串,如果是空字符串,会给你替换成空字符串。 - HLL_UNION:HLL 类型的列的聚合方式,通过 HyperLogLog 算法聚合。 - BITMAP_UNION:BIMTAP 类型的列的聚合方式,进行位图的并集聚合。 - ``` - - - 示例: - - ``` - k1 TINYINT, - k2 DECIMAL(10,2) DEFAULT "10.5", - k4 BIGINT NULL DEFAULT VALUE "1000" COMMENT "This is column k4", - v1 VARCHAR(10) REPLACE NOT NULL, - v2 BITMAP BITMAP_UNION, - v3 HLL HLL_UNION, - v4 INT SUM NOT NULL DEFAULT "1" COMMENT "This is column v4" - ``` - -* `index_definition_list` - - 索引列表定义: - - `index_definition[, index_definition]` - - * `index_definition` - - 索引定义: - - ```sql - INDEX index_name (col_name) [USING BITMAP] COMMENT 'xxxxxx' - ``` - - 示例: - - ```sql - INDEX idx1 (k1) USING BITMAP COMMENT "This is a bitmap index1", - INDEX idx2 (k2) USING BITMAP COMMENT "This is a bitmap index2", - ... - ``` - -* `engine_type` - - 表引擎类型。本文档中类型皆为 OLAP。其他外部表引擎类型见 [CREATE EXTERNAL TABLE](DORIS/SQL手册/语法帮助/DDL/CREATE-EXTERNAL-TABLE.md) 文档。示例: - - `ENGINE=olap` - -* `key_desc` - - 数据模型。 - - `key_type(col1, col2, ...)` - - `key_type` 支持以下模型: - - * DUPLICATE KEY(默认):其后指定的列为排序列。 - * AGGREGATE KEY:其后指定的列为维度列。 - * UNIQUE KEY:其后指定的列为主键列。 - - 示例: - - ``` - DUPLICATE KEY(col1, col2), - AGGREGATE KEY(k1, k2, k3), - UNIQUE KEY(k1, k2) - ``` - -* `table_comment` - - 表注释。示例: - - ``` - COMMENT "This is my first DORIS table" - ``` - -* `partition_desc` - - 分区信息,支持两种写法: - - 1. LESS THAN:仅定义分区上界。下界由上一个分区的上界决定。 - - ``` - PARTITION BY RANGE(col1[, col2, ...]) - ( - PARTITION partition_name1 VALUES LESS THAN MAXVALUE|("value1", "value2", ...), - PARTITION partition_name2 VALUES LESS THAN MAXVALUE|("value1", "value2", ...) - ) - ``` - - 2. FIXED RANGE:定义分区的左闭右开区间。 - - ``` - PARTITION BY RANGE(col1[, col2, ...]) - ( - PARTITION partition_name1 VALUES [("k1-lower1", "k2-lower1", "k3-lower1",...), ("k1-upper1", "k2-upper1", "k3-upper1", ...)), - PARTITION partition_name2 VALUES [("k1-lower1-2", "k2-lower1-2", ...), ("k1-upper1-2", MAXVALUE, )) - ) - ``` - -* `distribution_desc` - - 定义数据分桶方式。 - - `DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]` - -* `rollup_list` - - 建表的同时可以创建多个物化视图(ROLLUP)。 - - `ROLLUP (rollup_definition[, rollup_definition, ...])` - - * `rollup_definition` - - `rollup_name (col1[, col2, ...]) [DUPLICATE KEY(col1[, col2, ...])] [PROPERTIES("key" = "value")]` - - 示例: - - ``` - ROLLUP ( - r1 (k1, k3, v1, v2), - r2 (k1, v1) - ) - ``` - -* `properties` - - 设置表属性。目前支持以下属性: - - * `replication_num` - - 副本数。默认副本数为3。如果 BE 节点数量小于3,则需指定副本数小于等于 BE 节点数量。 - - 在 0.15 版本后,该属性将自动转换成 `replication_allocation` 属性,如: - - `"replication_num" = "3"` 会自动转换成 `"replication_allocation" = "tag.location.default:3"` - - * `replication_allocation` - - 根据 Tag 设置副本分布情况。该属性可以完全覆盖 `replication_num` 属性的功能。 - - * `storage_medium/storage_cooldown_time` - - 数据存储介质。`storage_medium` 用于声明表数据的初始存储介质,而 `storage_cooldown_time` 用于设定到期时间。示例: - - ``` - "storage_medium" = "SSD", - "storage_cooldown_time" = "2020-11-20 00:00:00" - ``` - - 这个示例表示数据存放在 SSD 中,并且在 2020-11-20 00:00:00 到期后,会自动迁移到 HDD 存储上。 - - * `colocate_with` - - 当需要使用 Colocation Join 功能时,使用这个参数设置 Colocation Group。 - - `"colocate_with" = "group1"` - - * `bloom_filter_columns` - - 用户指定需要添加 Bloom Filter 索引的列名称列表。各个列的 Bloom Filter 索引是独立的,并不是组合索引。 - - `"bloom_filter_columns" = "k1, k2, k3"` - - * `in_memory` - - 通过此属性设置该表是否为 [内存表](DORIS/操作手册/内存表.md)。 - - `"in_memory" = "true"` - - * `function_column.sequence_type` - - 当使用 UNIQUE KEY 模型时,可以指定一个sequence列,当KEY列相同时,将按照 sequence 列进行 REPLACE(较大值替换较小值,否则无法替换) - - 这里我们仅需指定顺序列的类型,支持时间类型或整型。Doris 会创建一个隐藏的顺序列。 - - `"function_column.sequence_type" = 'Date'` - - * 动态分区相关 - - 动态分区相关参数如下: - - * `dynamic_partition.enable`: 用于指定表级别的动态分区功能是否开启。默认为 true。 - * `dynamic_partition.time_unit:` 用于指定动态添加分区的时间单位,可选择为DAY(天),WEEK(周),MONTH(月),HOUR(时)。 - * `dynamic_partition.start`: 用于指定向前删除多少个分区。值必须小于0。默认为 Integer.MIN_VALUE。 - * `dynamic_partition.end`: 用于指定提前创建的分区数量。值必须大于0。 - * `dynamic_partition.prefix`: 用于指定创建的分区名前缀,例如分区名前缀为p,则自动创建分区名为p20200108。 - * `dynamic_partition.buckets`: 用于指定自动创建的分区分桶数量。 - * `dynamic_partition.create_history_partition`: 是否创建历史分区。 - * `dynamic_partition.history_partition_num`: 指定创建历史分区的数量。 - * `dynamic_partition.reserved_history_periods`: 用于指定保留的历史分区的时间段。 - - * 数据排序相关 - - 数据排序相关参数如下: - - * `data_sort.sort_type`: 数据排序使用的方法,目前支持两种:lexical/z-order,默认是lexical - * `data_sort.col_num`: 数据排序使用的列数,取最前面几列,不能超过总的key 列数 -### Example - -1. 创建一个明细模型的表 - - ```sql - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - k3 CHAR(10) COMMENT "string column", - k4 INT NOT NULL DEFAULT "1" COMMENT "int column" - ) - COMMENT "my first table" - DISTRIBUTED BY HASH(k1) BUCKETS 32 - ``` - -2. 创建一个明细模型的表,分区,指定排序列,设置副本数为1 - - ```sql - CREATE TABLE example_db.table_hash - ( - k1 DATE, - k2 DECIMAL(10, 2) DEFAULT "10.5", - k3 CHAR(10) COMMENT "string column", - k4 INT NOT NULL DEFAULT "1" COMMENT "int column" - ) - DUPLICATE KEY(k1, k2) - COMMENT "my first table" - PARTITION BY RANGE(k1) - ( - PARTITION p1 VALUES LESS THAN ("2020-02-01"), - PARTITION p1 VALUES LESS THAN ("2020-03-01"), - PARTITION p1 VALUES LESS THAN ("2020-04-01") - ) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ( - "replication_num" = "1" - ); - ``` - -3. 创建一个主键唯一模型的表,设置初始存储介质和冷却时间 - - ```sql - CREATE TABLE example_db.table_hash - ( - k1 BIGINT, - k2 LARGEINT, - v1 VARCHAR(2048) REPLACE, - v2 SMALLINT SUM DEFAULT "10" - ) - UNIQUE KEY(k1, k2) - DISTRIBUTED BY HASH (k1, k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", - "storage_cooldown_time" = "2015-06-04 00:00:00" - ); - ``` - -4. 创建一个聚合模型表,使用固定范围分区描述 - - ```sql - CREATE TABLE table_range - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048) REPLACE, - v2 INT SUM DEFAULT "1" - ) - AGGREGATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1, k2, k3) - ( - PARTITION p1 VALUES [("2014-01-01", "10", "200"), ("2014-01-01", "20", "300")), - PARTITION p2 VALUES [("2014-06-01", "100", "200"), ("2014-07-01", "100", "300")) - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - ``` - -5. 创建一个包含 HLL 和 BITMAP 列类型的聚合模型表 - - ```sql - CREATE TABLE example_db.example_table - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 HLL HLL_UNION, - v2 BITMAP BITMAP_UNION - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - ``` - -6. 创建两张同一个 Colocation Group 自维护的表。 - - ```sql - CREATE TABLE t1 ( - id int(11) COMMENT "", - value varchar(8) COMMENT "" - ) - DUPLICATE KEY(id) - DISTRIBUTED BY HASH(id) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "group1" - ); - - CREATE TABLE t2 ( - id int(11) COMMENT "", - value1 varchar(8) COMMENT "", - value2 varchar(8) COMMENT "" - ) - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "group1" - ); - ``` - -7. 创建一个带有 bitmap 索引以及 bloom filter 索引的内存表 - - ```sql - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 CHAR(10) REPLACE, - v2 INT SUM, - INDEX k1_idx (k1) USING BITMAP COMMENT 'my first index' - ) - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ( - "bloom_filter_columns" = "k2", - "in_memory" = "true" - ); - ``` - -8. 创建一个动态分区表。 - - 该表每天提前创建3天的分区,并删除3天前的分区。例如今天为`2020-01-08`,则会创建分区名为`p20200108`, `p20200109`, `p20200110`, `p20200111`的分区. 分区范围分别为: - - ``` - [types: [DATE]; keys: [2020-01-08]; ‥types: [DATE]; keys: [2020-01-09]; ) - [types: [DATE]; keys: [2020-01-09]; ‥types: [DATE]; keys: [2020-01-10]; ) - [types: [DATE]; keys: [2020-01-10]; ‥types: [DATE]; keys: [2020-01-11]; ) - [types: [DATE]; keys: [2020-01-11]; ‥types: [DATE]; keys: [2020-01-12]; ) - ``` - - ```sql - CREATE TABLE example_db.dynamic_partition - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - DUPLICATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1) () - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-3", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32" - ); - ``` - -9. 创建一个带有物化视图(ROLLUP)的表。 - - ```sql - CREATE TABLE example_db.rolup_index_table - ( - event_day DATE, - siteid INT DEFAULT '10', - citycode SMALLINT, - username VARCHAR(32) DEFAULT '', - pv BIGINT SUM DEFAULT '0' - ) - AGGREGATE KEY(event_day, siteid, citycode, username) - DISTRIBUTED BY HASH(siteid) BUCKETS 10 - ROLLUP ( - r1(event_day,siteid), - r2(event_day,citycode), - r3(event_day) - ) - PROPERTIES("replication_num" = "3"); - ``` - -10. 通过 `replication_allocation` 属性设置表的副本。 - - ```sql - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5" - ) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ( - "replication_allocation"="tag.location.group_a:1, tag.location.group_b:2" - ); - - - CREATE TABLE example_db.dynamic_partition - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - PARTITION BY RANGE (k1) () - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-3", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32", - "dynamic_partition."replication_allocation" = "tag.location.group_a:3" - ); - ``` - -### Keywords - - CREATE, TABLE - -### Best Practice - -#### 分区和分桶 - -一个表必须指定分桶列,但可以不指定分区。关于分区和分桶的具体介绍,可参阅 [数据划分](DORIS/开始使用/关系模型与数据划分.md) 文档。 - -Doris 中的表可以分为分区表和无分区的表。这个属性在建表时确定,之后不可更改。即对于分区表,可以在之后的使用过程中对分区进行增删操作,而对于无分区的表,之后不能再进行增加分区等操作。 - -同时,分区列和分桶列在表创建之后不可更改,既不能更改分区和分桶列的类型,也不能对这些列进行任何增删操作。 - -所以建议在建表前,先确认使用方式来进行合理的建表。 - -#### 动态分区 - -动态分区功能主要用于帮助用户自动的管理分区。通过设定一定的规则,Doris 系统定期增加新的分区或删除历史分区。可参阅 [动态分区](DORIS/操作手册/动态分区.md) 文档查看更多帮助。 - -#### 物化视图 - -用户可以在建表的同时创建多个物化视图(ROLLUP)。物化视图也可以在建表之后添加。写在建表语句中可以方便用户一次性创建所有物化视图。 - -如果在建表时创建好物化视图,则后续的所有数据导入操作都会同步生成物化视图的数据。物化视图的数量可能会影响数据导入的效率。 - -如果在之后的使用过程中添加物化视图,如果表中已有数据,则物化视图的创建时间取决于当前数据量大小。 - -关于物化视图的介绍,请参阅文档 [物化视图](DORIS/操作手册/物化视图.md)。 - -#### 索引 - -用户可以在建表的同时创建多个列的索引。索引也可以在建表之后再添加。 - -如果在之后的使用过程中添加索引,如果表中已有数据,则需要重写所有数据,因此索引的创建时间取决于当前数据量。 - -#### 内存表 - -当建表时指定了 `"in_memory" = "true"` 属性。则 Doris 会尽量将该表的数据块缓存在存储引擎的 PageCache 中,已减少磁盘IO。但这个属性不会保证数据块常驻在内存中,仅作为一种尽力而为的标识。 diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md deleted file mode 100644 index b851ff08ab..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Create/CREATE-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-VIEW", - "language": "zh-CN" -} ---- - - - -## CREATE-VIEW - -### Description - -### Example - -### Keywords - - CREATE, VIEW - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md deleted file mode 100644 index 7b9ba22abb..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-DATABASE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-DATABASE", - "language": "zh-CN" -} ---- - - - -## DROP-DATABASE - -### Description - -### Example - -### Keywords - - DROP, DATABASE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md deleted file mode 100644 index 43e0e70c4c..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-ENCRYPT-KEY", - "language": "zh-CN" -} ---- - - - -## DROP-ENCRYPT-KEY - -### Description - -### Example - -### Keywords - - DROP, ENCRYPT, KEY - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md deleted file mode 100644 index d725b905a5..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FILE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-FILE", - "language": "zh-CN" -} ---- - - - -## DROP-FILE - -### Description - -### Example - -### Keywords - - DROP, FILE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md deleted file mode 100644 index 9aa8f7d2e4..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-FUNCTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-FUNCTION", - "language": "zh-CN" -} ---- - - - -## DROP-FUNCTION - -### Description - -### Example - -### Keywords - - DROP, FUNCTION - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md deleted file mode 100644 index a19f946d75..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-INDEX.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-INDEX", - "language": "zh-CN" -} ---- - - - -## DROP-INDEX - -### Description - -### Example - -### Keywords - - DROP, INDEX - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md deleted file mode 100644 index 349bf28c63..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-MATERIALIZED-VIEW", - "language": "zh-CN" -} ---- - - - -## DROP-MATERIALIZED-VIEW - -### Description - -### Example - -### Keywords - - DROP, MATERIALIZED, VIEW - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md deleted file mode 100644 index 52031fb807..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-RESOURCE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-RESOURCE", - "language": "zh-CN" -} ---- - - - -## DROP-RESOURCE - -### Description - -### Example - -### Keywords - - DROP, RESOURCE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md deleted file mode 100644 index b4ff962400..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/DROP-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DROP-TABLE", - "language": "zh-CN" -} ---- - - - -## DROP-TABLE - -### Description - -### Example - -### Keywords - - DROP, TABLE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md deleted file mode 100644 index 3fcd88cce7..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Definition-Statements/Drop/TRUNCATE-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "TRUNCATE-TABLE", - "language": "zh-CN" -} ---- - - - -## TRUNCATE-TABLE - -### Description - -### Example - -### Keywords - - TRUNCATE, TABLE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md deleted file mode 100644 index 11c410f4cb..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ALTER-ROUTINE-LOAD", - "language": "zh-CN" -} ---- - - - -## ALTER-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - ALTER, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md deleted file mode 100644 index 1b253e7190..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/BROKER-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "BROKER-LOAD", - "language": "zh-CN" -} ---- - - - -## BROKER-LOAD - -### Description - -### Example - -### Keywords - - BROKER, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md deleted file mode 100644 index 4a8adeae6a..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CANCEL-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CANCEL-LOAD", - "language": "zh-CN" -} ---- - - - -## CANCEL-LOAD - -### Description - -### Example - -### Keywords - - CANCEL, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md deleted file mode 100644 index 9078ca38c8..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "CREATE-ROUTINE-LOAD", - "language": "zh-CN" -} ---- - - - -## CREATE-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - CREATE, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md deleted file mode 100644 index 9227a68af3..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "PAUSE-ROUTINE-LOAD", - "language": "zh-CN" -} ---- - - - -## PAUSE-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - PAUSE, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md deleted file mode 100644 index 2032251da6..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "RESUME-ROUTINE-LOAD", - "language": "zh-CN" -} ---- - - - -## RESUME-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - RESUME, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md deleted file mode 100644 index b31bded5a2..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "STOP-ROUTINE-LOAD", - "language": "zh-CN" -} ---- - - - -## STOP-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - STOP, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md deleted file mode 100644 index adba8b370e..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Load/STREAM-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "STREAM-LOAD", - "language": "zh-CN" -} ---- - - - -## STREAM-LOAD - -### Description - -### Example - -### Keywords - - STREAM, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md deleted file mode 100644 index 6831410090..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/DELETE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DELETE", - "language": "zh-CN" -} ---- - - - -## DELETE - -### Description - -### Example - -### Keywords - - DELETE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md deleted file mode 100644 index 535ed628c2..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/INSERT.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "INSERT", - "language": "zh-CN" -} ---- - - - -## INSERT - -### Description - -### Example - -### Keywords - - INSERT - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md deleted file mode 100644 index 688275b4bb..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Data-Manipulation-Statements/Manipulation/UPDATE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "UPDATE", - "language": "zh-CN" -} ---- - - - -## UPDATE - -### Description - -### Example - -### Keywords - - UPDATE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md deleted file mode 100644 index 6f232ed10a..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CANCEL-REPAIR.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-CANCEL-REPAIR", - "language": "zh-CN" -} ---- - - - -## ADMIN-CANCEL-REPAIR - -### Description - -### Example - -### Keywords - - ADMIN, CANCEL, REPAIR - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md deleted file mode 100644 index 18872cca47..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-CHECK-TABLET.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-CHECK-TABLET", - "language": "zh-CN" -} ---- - - - -## ADMIN-CHECK-TABLET - -### Description - -### Example - -### Keywords - - ADMIN, CHECK, TABLET - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md deleted file mode 100644 index cb39d0cd7d..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-REPAIR-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-REPAIR-TABLE", - "language": "zh-CN" -} ---- - - - -## ADMIN-REPAIR-TABLE - -### Description - -### Example - -### Keywords - - ADMIN, REPAIR, TABLE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md deleted file mode 100644 index 369a81a0f0..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-CONFIG.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SET-CONFIG", - "language": "zh-CN" -} ---- - - - -## ADMIN-SET-CONFIG - -### Description - -### Example - -### Keywords - - ADMIN, SET, CONFIG - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md deleted file mode 100644 index 4f7a4fefad..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SET-REPLICA-STATUS", - "language": "zh-CN" -} ---- - - - -## ADMIN-SET-REPLICA-STATUS - -### Description - -### Example - -### Keywords - - ADMIN, SET, REPLICA, STATUS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md deleted file mode 100644 index 57a7ce40f2..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-CONFIG.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SHOW-CONFIG", - "language": "zh-CN" -} ---- - - - -## ADMIN-SHOW-CONFIG - -### Description - -### Example - -### Keywords - - ADMIN, SHOW, CONFIG - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md deleted file mode 100644 index 684c4d9307..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SHOW-REPLICA-DISTRIBUTION", - "language": "zh-CN" -} ---- - - - -## ADMIN-SHOW-REPLICA-DISTRIBUTION - -### Description - -### Example - -### Keywords - - ADMIN, SHOW, REPLICA, DISTRIBUTION - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md deleted file mode 100644 index 69b40e2159..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "ADMIN-SHOW-REPLICA-STATUS", - "language": "zh-CN" -} ---- - - - -## ADMIN-SHOW-REPLICA-STATUS - -### Description - -### Example - -### Keywords - - ADMIN, SHOW, REPLICA, STATUS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md deleted file mode 100644 index 5c8db00138..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/INSTALL-PLUGIN.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "INSTALL-PLUGIN", - "language": "zh-CN" -} ---- - - - -## INSTALL-PLUGIN - -### Description - -### Example - -### Keywords - - INSTALL, PLUGIN - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md deleted file mode 100644 index 60dc6bffeb..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/KILL.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "KILL", - "language": "zh-CN" -} ---- - - - -## KILL - -### Description - -### Example - -### Keywords - - KILL - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md deleted file mode 100644 index 3057951ab6..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/RECOVER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "RECOVER", - "language": "zh-CN" -} ---- - - - -## RECOVER - -### Description - -### Example - -### Keywords - - RECOVER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md deleted file mode 100644 index 1df8b4c33e..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/SET-VARIABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SET-VARIABLE", - "language": "zh-CN" -} ---- - - - -## SET-VARIABLE - -### Description - -### Example - -### Keywords - - SET, VARIABLE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md b/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md deleted file mode 100644 index 7508345891..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Database-Administration-Statements/UNINSTALL-PLUGIN.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "UNINSTALL-PLUGIN", - "language": "zh-CN" -} ---- - - - -## UNINSTALL-PLUGIN - -### Description - -### Example - -### Keywords - - UNINSTALL, PLUGIN - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md deleted file mode 100644 index d4cd8586be..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ALTER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ALTER", - "language": "zh-CN" -} ---- - - - -## SHOW-ALTER - -### Description - -### Example - -### Keywords - - SHOW, ALTER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md deleted file mode 100644 index b534f590c7..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKENDS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-BACKENDS", - "language": "zh-CN" -} ---- - - - -## SHOW-BACKENDS - -### Description - -### Example - -### Keywords - - SHOW, BACKENDS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md deleted file mode 100644 index 65e880064a..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BACKUP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-BACKUP", - "language": "zh-CN" -} ---- - - - -## SHOW-BACKUP - -### Description - -### Example - -### Keywords - - SHOW, BACKUP - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md deleted file mode 100644 index 6e0ceb9fb9..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-BROKER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-BROKER", - "language": "zh-CN" -} ---- - - - -## SHOW-BROKER - -### Description - -### Example - -### Keywords - - SHOW, BROKER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md deleted file mode 100644 index c026ded599..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-COLUMNS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-COLUMNS", - "language": "zh-CN" -} ---- - - - -## SHOW-COLUMNS - -### Description - -### Example - -### Keywords - - SHOW, COLUMNS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md deleted file mode 100644 index e5eb96bc00..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-DATABASE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-CREATE-DATABASE", - "language": "zh-CN" -} ---- - - - -## SHOW-CREATE-DATABASE - -### Description - -### Example - -### Keywords - - SHOW, CREATE, DATABASE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md deleted file mode 100644 index 8c88216e9d..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-FUNCTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-CREATE-FUNCTION", - "language": "zh-CN" -} ---- - - - -## SHOW-CREATE-FUNCTION - -### Description - -### Example - -### Keywords - - SHOW, CREATE, FUNCTION - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md deleted file mode 100644 index 09c4fa3c65..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-CREATE-ROUTINE-LOAD", - "language": "zh-CN" -} ---- - - - -## SHOW-CREATE-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - SHOW, CREATE, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md deleted file mode 100644 index a34c3df9a2..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-CREATE-TABLE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-CREATE-TABLE", - "language": "zh-CN" -} ---- - - - -## SHOW-CREATE-TABLE - -### Description - -### Example - -### Keywords - - SHOW, CREATE, TABLE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md deleted file mode 100644 index 3b66e59c64..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATA.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DATA", - "language": "zh-CN" -} ---- - - - -## SHOW-DATA - -### Description - -### Example - -### Keywords - - SHOW, DATA - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md deleted file mode 100644 index aa366f471f..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASE-ID.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DATABASE-ID", - "language": "zh-CN" -} ---- - - - -## SHOW-DATABASE-ID - -### Description - -### Example - -### Keywords - - SHOW, DATABASE, ID - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md deleted file mode 100644 index cc921a4ca1..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DATABASES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DATABASES", - "language": "zh-CN" -} ---- - - - -## SHOW-DATABASES - -### Description - -### Example - -### Keywords - - SHOW, DATABASES - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md deleted file mode 100644 index bd915ee405..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DELETE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DELETE", - "language": "zh-CN" -} ---- - - - -## SHOW-DELETE - -### Description - -### Example - -### Keywords - - SHOW, DELETE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md deleted file mode 100644 index beaf0c1e03..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-DYNAMIC-PARTITION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-DYNAMIC-PARTITION", - "language": "zh-CN" -} ---- - - - -## SHOW-DYNAMIC-PARTITION - -### Description - -### Example - -### Keywords - - SHOW, DYNAMIC, PARTITION - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md deleted file mode 100644 index 3856951477..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENCRYPT-KEY.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ENCRYPT-KEY", - "language": "zh-CN" -} ---- - - - -## SHOW-ENCRYPT-KEY - -### Description - -### Example - -### Keywords - - SHOW, ENCRYPT, KEY - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENGINES.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENGINES.md deleted file mode 100644 index 3befb4ef45..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ENGINES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ENGINES", - "language": "zh-CN" -} ---- - - - -## SHOW-ENGINES - -### Description - -### Example - -### Keywords - - SHOW, ENGINES - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-EVENTS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-EVENTS.md deleted file mode 100644 index d8827df519..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-EVENTS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-EVENTS", - "language": "zh-CN" -} ---- - - - -## SHOW-EVENTS - -### Description - -### Example - -### Keywords - - SHOW, EVENTS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md deleted file mode 100644 index cdc20263be..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-EXPORT.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-EXPORT", - "language": "zh-CN" -} ---- - - - -## SHOW-EXPORT - -### Description - -### Example - -### Keywords - - SHOW, EXPORT - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md deleted file mode 100644 index 950bc53c9e..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-FRONTENDS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-FRONTENDS", - "language": "zh-CN" -} ---- - - - -## SHOW-FRONTENDS - -### Description - -### Example - -### Keywords - - SHOW, FRONTENDS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md deleted file mode 100644 index c0a8edac07..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-FUNCTIONS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-FUNCTIONS", - "language": "zh-CN" -} ---- - - - -## SHOW-FUNCTIONS - -### Description - -### Example - -### Keywords - - SHOW, FUNCTIONS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md deleted file mode 100644 index 74756734ce..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-GRANTS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-GRANTS", - "language": "zh-CN" -} ---- - - - -## SHOW-GRANTS - -### Description - -### Example - -### Keywords - - SHOW, GRANTS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md deleted file mode 100644 index 13c305e379..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-INDEX.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-INDEX", - "language": "zh-CN" -} ---- - - - -## SHOW-INDEX - -### Description - -### Example - -### Keywords - - SHOW, INDEX - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md deleted file mode 100644 index ec893a7f7f..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-PROFILE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-LOAD-PROFILE", - "language": "zh-CN" -} ---- - - - -## SHOW-LOAD-PROFILE - -### Description - -### Example - -### Keywords - - SHOW, LOAD, PROFILE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md deleted file mode 100644 index 5c077c47bb..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD-WARNINGS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-LOAD-WARNINGS", - "language": "zh-CN" -} ---- - - - -## SHOW-LOAD-WARNINGS - -### Description - -### Example - -### Keywords - - SHOW, LOAD, WARNINGS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md deleted file mode 100644 index 99f389b8a1..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-LOAD", - "language": "zh-CN" -} ---- - - - -## SHOW-LOAD - -### Description - -### Example - -### Keywords - - SHOW, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md deleted file mode 100644 index 260e36b2de..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-MIGRATIONS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-MIGRATIONS", - "language": "zh-CN" -} ---- - - - -## SHOW-MIGRATIONS - -### Description - -### Example - -### Keywords - - SHOW, MIGRATIONS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-OPEN-TABLES.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-OPEN-TABLES.md deleted file mode 100644 index 9ca35094e9..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-OPEN-TABLES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-OPEN-TABLES", - "language": "zh-CN" -} ---- - - - -## SHOW-OPEN-TABLES - -### Description - -### Example - -### Keywords - - SHOW, OPEN, TABLES - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md deleted file mode 100644 index b58e65bf0e..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITION-ID.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PARTITION-ID", - "language": "zh-CN" -} ---- - - - -## SHOW-PARTITION-ID - -### Description - -### Example - -### Keywords - - SHOW, PARTITION, ID - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md deleted file mode 100644 index 7fe2712553..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PARTITIONS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PARTITIONS", - "language": "zh-CN" -} ---- - - - -## SHOW-PARTITIONS - -### Description - -### Example - -### Keywords - - SHOW, PARTITIONS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md deleted file mode 100644 index 2b74d9f1a0..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PLUGINS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PLUGINS", - "language": "zh-CN" -} ---- - - - -## SHOW-PLUGINS - -### Description - -### Example - -### Keywords - - SHOW, PLUGINS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md deleted file mode 100644 index ddc3128650..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROC.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PROC", - "language": "zh-CN" -} ---- - - - -## SHOW-PROC - -### Description - -### Example - -### Keywords - - SHOW, PROC - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md deleted file mode 100644 index e17b4323e0..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROCESSLIST.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-PROCESSLIST", - "language": "zh-CN" -} ---- - - - -## SHOW-PROCESSLIST - -### Description - -### Example - -### Keywords - - SHOW, PROCESSLIST - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md deleted file mode 100644 index bf68a42cea..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-PROPERTY.md +++ /dev/null @@ -1,83 +0,0 @@ ---- -{ - "title": "SHOW-PROPERTY", - "language": "zh-CN" -} ---- - - - -## SHOW-PROPERTY - -### Description - -该语句用于查看用户的属性 - -``` -SHOW PROPERTY [FOR user] [LIKE key]; -``` - -* `user` - - 查看指定用户的属性。如不指定,查看当前用户的。 - -* `LIKE` - - 可以通过属性名模糊匹配。 - -返回结果说明: - -```sql -mysql> show property like '%connection%'; -+----------------------+-------+ -| Key | Value | -+----------------------+-------+ -| max_user_connections | 100 | -+----------------------+-------+ -1 row in set (0.01 sec) -``` - -* `Key` - - 属性名。 - -* `Value` - - 属性值。 - -### Example - -1. 查看 jack 用户的属性 - - ```sql - SHOW PROPERTY FOR 'jack'; - ``` - -2. 查看 jack 用户连接数限制属性 - - ```sql - SHOW PROPERTY FOR 'jack' LIKE '%connection%'; - ``` - -### Keywords - - SHOW, PROPERTY - -### Best Practice diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md deleted file mode 100644 index 8f405c921f..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-REPOSITORIES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-REPOSITORIES", - "language": "zh-CN" -} ---- - - - -## SHOW-REPOSITORIES - -### Description - -### Example - -### Keywords - - SHOW, REPOSITORIES - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md deleted file mode 100644 index 47794b543b..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESOURCES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-RESOURCES", - "language": "zh-CN" -} ---- - - - -## SHOW-RESOURCES - -### Description - -### Example - -### Keywords - - SHOW, RESOURCES - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md deleted file mode 100644 index cdf9b2e8b4..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-RESTORE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-RESTORE", - "language": "zh-CN" -} ---- - - - -## SHOW-RESTORE - -### Description - -### Example - -### Keywords - - SHOW, RESTORE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md deleted file mode 100644 index b9229c5c50..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROLES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ROLES", - "language": "zh-CN" -} ---- - - - -## SHOW-ROLES - -### Description - -### Example - -### Keywords - - SHOW, ROLES - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md deleted file mode 100644 index 3370348a0e..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD-TASK.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ROUTINE-LOAD-TASK", - "language": "zh-CN" -} ---- - - - -## SHOW-ROUTINE-LOAD-TASK - -### Description - -### Example - -### Keywords - - SHOW, ROUTINE, LOAD, TASK - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md deleted file mode 100644 index 29347c3241..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-ROUTINE-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-ROUTINE-LOAD", - "language": "zh-CN" -} ---- - - - -## SHOW-ROUTINE-LOAD - -### Description - -### Example - -### Keywords - - SHOW, ROUTINE, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md deleted file mode 100644 index eaf3f79b97..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-SMALL-FILES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-SMALL-FILES", - "language": "zh-CN" -} ---- - - - -## SHOW-SMALL-FILES - -### Description - -### Example - -### Keywords - - SHOW, SMALL, FILES - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md deleted file mode 100644 index c82cc9a2b5..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-SNAPSHOT.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-SNAPSHOT", - "language": "zh-CN" -} ---- - - - -## SHOW-SNAPSHOT - -### Description - -### Example - -### Keywords - - SHOW, SNAPSHOT - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md deleted file mode 100644 index 8f14f22e37..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-STATUS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-STATUS", - "language": "zh-CN" -} ---- - - - -## SHOW-STATUS - -### Description - -### Example - -### Keywords - - SHOW, STATUS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md deleted file mode 100644 index 71d32a9dcb..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-STREAM-LOAD.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-STREAM-LOAD", - "language": "zh-CN" -} ---- - - - -## SHOW-STREAM-LOAD - -### Description - -### Example - -### Keywords - - SHOW, STREAM, LOAD - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md deleted file mode 100644 index 10377090f0..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-ID.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-TABLE-ID", - "language": "zh-CN" -} ---- - - - -## SHOW-TABLE-ID - -### Description - -### Example - -### Keywords - - SHOW, TABLE, ID - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md deleted file mode 100644 index 0993f3ba9f..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLE-STATUS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-TABLE-STATUS", - "language": "zh-CN" -} ---- - - - -## SHOW-TABLE-STATUS - -### Description - -### Example - -### Keywords - - SHOW, TABLE, STATUS - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md deleted file mode 100644 index 36e3f93775..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TABLET.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-TABLET", - "language": "zh-CN" -} ---- - - - -## SHOW-TABLET - -### Description - -### Example - -### Keywords - - SHOW, TABLET - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md deleted file mode 100644 index 9033c6cba9..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-TRANSACTION.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-TRANSACTION", - "language": "zh-CN" -} ---- - - - -## SHOW-TRANSACTION - -### Description - -### Example - -### Keywords - - SHOW, TRANSACTION - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-USER.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-USER.md deleted file mode 100644 index c717dc01b3..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-USER.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-USER", - "language": "zh-CN" -} ---- - - - -## SHOW-USER - -### Description - -### Example - -### Keywords - - SHOW, USER - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md deleted file mode 100644 index f9419ababb..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-VARIABLES.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-VARIABLES", - "language": "zh-CN" -} ---- - - - -## SHOW-VARIABLES - -### Description - -### Example - -### Keywords - - SHOW, VARIABLES - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md b/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md deleted file mode 100644 index 83388cac93..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Show-Statements/SHOW-VIEW.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW-VIEW", - "language": "zh-CN" -} ---- - - - -## SHOW-VIEW - -### Description - -### Example - -### Keywords - - SHOW, VIEW - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md deleted file mode 100644 index 3d887dcc98..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/DESCRIBE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "DESCRIBE", - "language": "zh-CN" -} ---- - - - -## DESCRIBE - -### Description - -### Example - -### Keywords - - DESCRIBE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/HELP.md b/docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/HELP.md deleted file mode 100644 index 20f6e1a369..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/HELP.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "HELP", - "language": "zh-CN" -} ---- - - - -## HELP - -### Description - -### Example - -### Keywords - - HELP - -### Best Practice - diff --git a/docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/USE.md b/docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/USE.md deleted file mode 100644 index 9516afae6f..0000000000 --- a/docs/zh-CN/sql-reference-v2/sql-statements/Utility-Statements/USE.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "USE", - "language": "zh-CN" -} ---- - - - -## USE - -### Description - -### Example - -### Keywords - - USE - -### Best Practice - diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/CREATE ROLE.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/CREATE ROLE.md deleted file mode 100644 index f8f4879d1c..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/CREATE ROLE.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "CREATE ROLE", - "language": "zh-CN" -} ---- - - - -# CREATE ROLE -## description - 该语句用户创建一个角色 - - 语法: - CREATE ROLE [IF NOT EXISTS] role1; - - 该语句创建一个无权限的角色,可以后续通过 GRANT 命令赋予该角色权限。 - -## example - - 1. 创建一个角色 - - CREATE ROLE role1; - -## keyword - - CREATE, ROLE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/CREATE USER.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/CREATE USER.md deleted file mode 100644 index 7acb73e05d..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/CREATE USER.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -{ - "title": "CREATE USER", - "language": "zh-CN" -} ---- - - - -# CREATE USER -## description - -Syntax: - - CREATE USER [IF NOT EXISTS] user_identity [IDENTIFIED BY 'password'] [DEFAULT ROLE 'role_name'] - - user_identity: - 'user_name'@'host' - -CREATE USER 命令用于创建一个 Doris 用户。在 Doris 中,一个 user_identity 唯一标识一个用户。user_identity 由两部分组成,user_name 和 host,其中 username 为用户名。host 标识用户端连接所在的主机地址。host 部分可以使用 % 进行模糊匹配。如果不指定 host,默认为 '%',即表示该用户可以从任意 host 连接到 Doris。 - -host 部分也可指定为 domain,语法为:'user_name'@['domain'],即使用中括号包围,则 Doris 会认为这个是一个 domain,并尝试解析其 ip 地址。目前仅支持百度内部的 BNS 解析。 - -如果指定了角色(ROLE),则会自动将该角色所拥有的权限赋予新创建的这个用户。如果不指定,则该用户默认没有任何权限。指定的 ROLE 必须已经存在。 - -## example - -1. 创建一个无密码用户(不指定 host,则等价于 jack@'%') - - CREATE USER 'jack'; - -2. 创建一个有密码用户,允许从 '172.10.1.10' 登陆 - - CREATE USER jack@'172.10.1.10' IDENTIFIED BY '123456'; - -3. 为了避免传递明文,用例2也可以使用下面的方式来创建 - - CREATE USER jack@'172.10.1.10' IDENTIFIED BY PASSWORD '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9'; - - 后面加密的内容可以通过PASSWORD()获得到,例如: - - SELECT PASSWORD('123456'); - -4. 创建一个允许从 '192.168' 子网登陆的用户,同时指定其角色为 example_role - - CREATE USER 'jack'@'192.168.%' DEFAULT ROLE 'example_role'; - -5. 创建一个允许从域名 'example_domain' 登陆的用户 - - CREATE USER 'jack'@['example_domain'] IDENTIFIED BY '12345'; - -6. 创建一个用户,并指定一个角色 - - CREATE USER 'jack'@'%' IDENTIFIED BY '12345' DEFAULT ROLE 'my_role'; - -## keyword - - CREATE, USER - diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/DROP ROLE.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/DROP ROLE.md deleted file mode 100644 index d9cf9fca17..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/DROP ROLE.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "DROP ROLE", - "language": "zh-CN" -} ---- - - - -# DROP ROLE -## description - 该语句用户删除一个角色 - - 语法: - DROP ROLE [IF EXISTS] role1; - - 删除一个角色,不会影响之前属于该角色的用户的权限。仅相当于将该角色与用户解耦。用户已经从该角色中获取到的权限,不会改变。 - -## example - - 1. 删除一个角色 - - DROP ROLE role1; - -## keyword - DROP, ROLE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/DROP USER.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/DROP USER.md deleted file mode 100644 index 43e7fc8b12..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/DROP USER.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -{ - "title": "DROP USER", - "language": "zh-CN" -} ---- - - - -# DROP USER -## description - -Syntax: - - DROP USER [IF EXISTS] 'user_identity' - - `user_identity`: - - user@'host' - user@['domain'] - - 删除指定的 user identitiy. - -## example - -1. 删除用户 jack@'192.%' - - DROP USER 'jack'@'192.%' - -## keyword - - DROP, USER - diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/GRANT.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/GRANT.md deleted file mode 100644 index 7596554122..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/GRANT.md +++ /dev/null @@ -1,110 +0,0 @@ ---- -{ - "title": "GRANT", - "language": "zh-CN" -} ---- - - - -# GRANT -## description - -GRANT 命令用于赋予指定用户或角色指定的权限。 - -Syntax: - - GRANT privilege_list ON db_name[.tbl_name] TO user_identity [ROLE role_name] - - GRANT privilege_list ON RESOURCE resource_name TO user_identity [ROLE role_name] - - -privilege_list 是需要赋予的权限列表,以逗号分隔。当前 Doris 支持如下权限: - - NODE_PRIV:集群节点操作权限,包括节点上下线等操作,只有 root 用户有该权限,不可赋予其他用户。 - ADMIN_PRIV:除 NODE_PRIV 以外的所有权限。 - GRANT_PRIV: 操作权限的权限。包括创建删除用户、角色,授权和撤权,设置密码等。 - SELECT_PRIV:对指定的库或表的读取权限 - LOAD_PRIV:对指定的库或表的导入权限 - ALTER_PRIV:对指定的库或表的schema变更权限 - CREATE_PRIV:对指定的库或表的创建权限 - DROP_PRIV:对指定的库或表的删除权限 - USAGE_PRIV: 对指定资源的使用权限 - - 旧版权限中的 ALL 和 READ_WRITE 会被转换成:SELECT_PRIV,LOAD_PRIV,ALTER_PRIV,CREATE_PRIV,DROP_PRIV; - READ_ONLY 会被转换为 SELECT_PRIV。 - -权限分类: - - 1. 节点权限:NODE_PRIV - 2. 库表权限:SELECT_PRIV,LOAD_PRIV,ALTER_PRIV,CREATE_PRIV,DROP_PRIV - 3. 资源权限:USAGE_PRIV - -db_name[.tbl_name] 支持以下三种形式: - - 1. *.* 权限可以应用于所有库及其中所有表 - 2. db.* 权限可以应用于指定库下的所有表 - 3. db.tbl 权限可以应用于指定库下的指定表 - - 这里指定的库或表可以是不存在的库和表。 - -resource_name 支持以下两种形式: - - 1. * 权限应用于所有资源 - 2. resource 权限应用于指定资源 - - 这里指定的资源可以是不存在的资源。 - -user_identity: - - 这里的 user_identity 语法同 CREATE USER。且必须为使用 CREATE USER 创建过的 user_identity。user_identity 中的host可以是域名,如果是域名的话,权限的生效时间可能会有1分钟左右的延迟。 - - 也可以将权限赋予指定的 ROLE,如果指定的 ROLE 不存在,则会自动创建。 - -## example - - 1. 授予所有库和表的权限给用户 - - GRANT SELECT_PRIV ON *.* TO 'jack'@'%'; - - 2. 授予指定库表的权限给用户 - - GRANT SELECT_PRIV,ALTER_PRIV,LOAD_PRIV ON db1.tbl1 TO 'jack'@'192.8.%'; - - 3. 授予指定库表的权限给角色 - - GRANT LOAD_PRIV ON db1.* TO ROLE 'my_role'; - - 4. 授予所有资源的使用权限给用户 - - GRANT USAGE_PRIV ON RESOURCE * TO 'jack'@'%'; - - 5. 授予指定资源的使用权限给用户 - - GRANT USAGE_PRIV ON RESOURCE 'spark_resource' TO 'jack'@'%'; - - 6. 授予指定资源的使用权限给角色 - - GRANT USAGE_PRIV ON RESOURCE 'spark_resource' TO ROLE 'my_role'; - -## keyword - - GRANT - diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/REVOKE.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/REVOKE.md deleted file mode 100644 index 4a8168ee7b..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/REVOKE.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -{ - "title": "REVOKE", - "language": "zh-CN" -} ---- - - - -# REVOKE -## description - - REVOKE 命令用于撤销指定用户或角色指定的权限。 - Syntax: - REVOKE privilege_list ON db_name[.tbl_name] FROM user_identity [ROLE role_name] - - REVOKE privilege_list ON RESOURCE resource_name FROM user_identity [ROLE role_name] - - user_identity: - - 这里的 user_identity 语法同 CREATE USER。且必须为使用 CREATE USER 创建过的 user_identity。user_identity 中的host可以是域名,如果是域名的话,权限的撤销时间可能会有1分钟左右的延迟。 - - 也可以撤销指定的 ROLE 的权限,执行的 ROLE 必须存在。 - -## example - - 1. 撤销用户 jack 数据库 testDb 的权限 - - REVOKE SELECT_PRIV ON db1.* FROM 'jack'@'192.%'; - - 1. 撤销用户 jack 资源 spark_resource 的使用权限 - - REVOKE USAGE_PRIV ON RESOURCE 'spark_resource' FROM 'jack'@'192.%'; - -## keyword - - REVOKE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/SET PASSWORD.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/SET PASSWORD.md deleted file mode 100644 index 63ae615868..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/SET PASSWORD.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -{ - "title": "SET PASSWORD", - "language": "zh-CN" -} ---- - - - -# SET PASSWORD -## description - -Syntax: - - SET PASSWORD [FOR user_identity] = - [PASSWORD('plain password')]|['hashed password'] - - SET PASSWORD 命令可以用于修改一个用户的登录密码。如果 [FOR user_identity] 字段不存在,那么修改当前用户的密码。 - - 注意这里的 user_identity 必须完全匹配在使用 CREATE USER 创建用户时指定的 user_identity,否则会报错用户不存在。如果不指定 user_identity,则当前用户为 'username'@'ip',这个当前用户,可能无法匹配任何 user_identity。可以通过 SHOW GRANTS 查看当前用户。 - - PASSWORD() 方式输入的是明文密码; 而直接使用字符串,需要传递的是已加密的密码。 - 如果修改其他用户的密码,需要具有管理员权限。 - -## example - -1. 修改当前用户的密码 - - SET PASSWORD = PASSWORD('123456') - SET PASSWORD = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' - -2. 修改指定用户密码 - - SET PASSWORD FOR 'jack'@'192.%' = PASSWORD('123456') - SET PASSWORD FOR 'jack'@['domain'] = '*6BB4837EB74329105EE4568DDA7DC67ED2CA2AD9' - -## keyword - SET, PASSWORD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/SET PROPERTY.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/SET PROPERTY.md deleted file mode 100644 index fb9d34ee73..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/SET PROPERTY.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -{ - "title": "SET PROPERTY", - "language": "zh-CN" -} ---- - - - -# SET PROPERTY -## description - - Syntax: - - SET PROPERTY [FOR 'user'] 'key' = 'value' [, 'key' = 'value'] - - 设置用户的属性,包括分配给用户的资源、导入cluster等。这里设置的用户属性,是针对 user 的,而不是 user_identity。即假设通过 CREATE USER 语句创建了两个用户 'jack'@'%' 和 'jack'@'192.%',则使用 SET PROPERTY 语句,只能针对 jack 这个用户,而不是 'jack'@'%' 或 'jack'@'192.%' - - 导入 cluster 仅适用于百度内部用户。 - - key: - - 超级用户权限: - max_user_connections: 最大连接数。 - max_query_instances: 用户同一时间点执行查询可以使用的instance个数。 - sql_block_rules: 设置 sql block rules。设置后,该用户发送的查询如果匹配规则,则会被拒绝。 - cpu_resource_limit: 限制查询的cpu资源。详见会话变量 `cpu_resource_limit` 的介绍。-1 表示未设置。 - exec_mem_limit: 限制查询的内存使用。详见会话变量 `exec_mem_limit` 的介绍。-1 表示未设置。 - load_mem_limit: 限制导入的内存使用。详见会话变量 `load_mem_limit` 的介绍。-1 表示未设置。 - resource.cpu_share: cpu资源分配。(已废弃) - load_cluster.{cluster_name}.priority: 为指定的cluster分配优先级,可以为 HIGH 或 NORMAL - resource_tags:指定用户的资源标签权限。 - - 注:`cpu_resource_limit`, `exec_mem_limit`, `load_mem_limit` 三个属性如果未设置,则默认使用会话变量中值。 - - 普通用户权限: - quota.normal: normal级别的资源分配。 - quota.high: high级别的资源分配。 - quota.low: low级别的资源分配。 - - load_cluster.{cluster_name}.hadoop_palo_path: palo使用的hadoop目录,需要存放etl程序及etl生成的中间数据供palo导入。导入完成后会自动清理中间数据,etl程序自动保留下次使用。 - load_cluster.{cluster_name}.hadoop_configs: hadoop的配置,其中fs.default.name、mapred.job.tracker、hadoop.job.ugi必须填写。 - load_cluster.{cluster_name}.hadoop_http_port: hadoop hdfs name node http端口。其中 hdfs 默认为8070,afs 默认 8010。 - default_load_cluster: 默认的导入cluster。 - -## example - - 1. 修改用户 jack 最大连接数为1000 - SET PROPERTY FOR 'jack' 'max_user_connections' = '1000'; - - 2. 修改用户 jack 的cpu_share为1000 - SET PROPERTY FOR 'jack' 'resource.cpu_share' = '1000'; - - 3. 修改 jack 用户的normal组的权重 - SET PROPERTY FOR 'jack' 'quota.normal' = '400'; - - 4. 为用户 jack 添加导入cluster - SET PROPERTY FOR 'jack' - 'load_cluster.{cluster_name}.hadoop_palo_path' = '/user/palo/palo_path', - 'load_cluster.{cluster_name}.hadoop_configs' = 'fs.default.name=hdfs://dpp.cluster.com:port;mapred.job.tracker=dpp.cluster.com:port;hadoop.job.ugi=user,password;mapred.job.queue.name=job_queue_name_in_hadoop;mapred.job.priority=HIGH;'; - - 5. 删除用户 jack 下的导入cluster。 - SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}' = ''; - - 6. 修改用户 jack 默认的导入cluster - SET PROPERTY FOR 'jack' 'default_load_cluster' = '{cluster_name}'; - - 7. 修改用户 jack 的集群优先级为 HIGH - SET PROPERTY FOR 'jack' 'load_cluster.{cluster_name}.priority' = 'HIGH'; - - 8. 修改用户jack的查询可用instance个数为3000 - SET PROPERTY FOR 'jack' 'max_query_instances' = '3000'; - - 9. 修改用户jack的sql block rule - SET PROPERTY FOR 'jack' 'sql_block_rules' = 'rule1, rule2'; - - 10. 修改用户jack的 cpu 使用限制 - SET PROPERTY FOR 'jack' 'cpu_resource_limit' = '2'; - - 11. 修改用户的资源标签权限 - SET PROPERTY FOR 'jack' 'resource_tags.location' = 'group_a, group_b'; - - 12. 修改用户的查询内存使用限制,单位字节 - SET PROPERTY FOR 'jack' 'exec_mem_limit' = '2147483648'; - - 13. 修改用户的导入内存使用限制,单位字节 - SET PROPERTY FOR 'jack' 'load_mem_limit' = '2147483648'; - -## keyword - SET, PROPERTY - diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/SHOW GRANTS.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/SHOW GRANTS.md deleted file mode 100644 index df9fa5ae7f..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/SHOW GRANTS.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -{ - "title": "SHOW GRANTS", - "language": "zh-CN" -} ---- - - - -# SHOW GRANTS -## description - - 该语句用于查看用户权限。 - - 语法: - SHOW [ALL] GRANTS [FOR user_identity]; - - 说明: - 1. SHOW ALL GRANTS 可以查看所有用户的权限。 - 2. 如果指定 user_identity,则查看该指定用户的权限。且该 user_identity 必须为通过 CREATE USER 命令创建的。 - 3. 如果不指定 user_identity,则查看当前用户的权限。 - - -## example - - 1. 查看所有用户权限信息 - - SHOW ALL GRANTS; - - 2. 查看指定 user 的权限 - - SHOW GRANTS FOR jack@'%'; - - 3. 查看当前用户的权限 - - SHOW GRANTS; - -## keyword - - SHOW, GRANTS diff --git a/docs/zh-CN/sql-reference/sql-statements/Account Management/SHOW ROLES.md b/docs/zh-CN/sql-reference/sql-statements/Account Management/SHOW ROLES.md deleted file mode 100644 index ddf694812c..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Account Management/SHOW ROLES.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -{ - "title": "SHOW ROLES", - "language": "zh-CN" -} ---- - - - -# SHOW ROLES -## description - 该语句用于展示所有已创建的角色信息,包括角色名称,包含的用户以及权限。 - - 语法: - SHOW ROLES; - -## example - - 1. 查看已创建的角色: - - SHOW ROLES; - -## keyword - SHOW,ROLES - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md deleted file mode 100644 index e6978107c9..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CANCEL REBALANCE DISK.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -{ - "title": "ADMIN CANCEL REBALANCE DISK", - "language": "zh-CN" -} ---- - - - -# ADMIN CANCEL REBALANCE DISK -## description - - 该语句用于取消优先均衡BE的磁盘 - - 语法: - - ADMIN CANCEL REBALANCE DISK [ON ("BackendHost1:BackendHeartBeatPort1", "BackendHost2:BackendHeartBeatPort2", ...)]; - - 说明: - - 1. 该语句仅表示系统不再优先均衡指定BE的磁盘数据。系统仍会以默认调度方式均衡BE的磁盘数据。 - -## example - - 1. 取消集群所有BE的优先磁盘均衡 - - ADMIN CANCEL REBALANCE DISK; - - 2. 取消指定BE的优先磁盘均衡 - - ADMIN CANCEL REBALANCE DISK ON ("192.168.1.1:1234", "192.168.1.2:1234"); - -## keyword - ADMIN,CANCEL,REBALANCE,DISK - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md deleted file mode 100644 index f07cc70222..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CANCEL REPAIR.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -{ - "title": "ADMIN CANCEL REPAIR", - "language": "zh-CN" -} ---- - - - -# ADMIN CANCEL REPAIR -## description - - 该语句用于取消以高优先级修复指定表或分区 - - 语法: - - ADMIN CANCEL REPAIR TABLE table_name[ PARTITION (p1,...)]; - - 说明: - - 1. 该语句仅表示系统不再以高优先级修复指定表或分区的分片副本。系统仍会以默认调度方式修复副本。 - -## example - - 1. 取消高优先级修复 - - ADMIN CANCEL REPAIR TABLE tbl PARTITION(p1); - -## keyword - ADMIN,CANCEL,REPAIR - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md deleted file mode 100644 index b71eb0dc1e..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CHECK TABLET.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -{ - "title": "ADMIN CHECK TABLET", - "language": "zh-CN" -} ---- - - - -# ADMIN CHECK TABLET -## description - -该语句用于对一组 tablet 执行指定的检查操作 - -语法: - -``` -ADMIN CHECK TABLE (tablet_id1, tablet_id2, ...) -PROPERTIES("type" = "..."); -``` - -说明: - -1. 必须指定 tablet id 列表以及 PROPERTIES 中的 type 属性。 -2. 目前 type 仅支持: - - * consistency: 对tablet的副本数据一致性进行检查。该命令为异步命令,发送后,Doris 会开始执行对应 tablet 的一致性检查作业。最终的结果,将体现在 `SHOW PROC "/statistic";` 结果中的 InconsistentTabletNum 列。 - -## example - -1. 对指定的一组 tablet 进行副本数据一致性检查 - - ``` - ADMIN CHECK TABLET (10000, 10001) - PROPERTIES("type" = "consistency"); - ``` - -## keyword - - ADMIN,CHECK,TABLET diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md deleted file mode 100644 index 04e49bd7e6..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN CLEAN TRASH.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -{ - "title": "ADMIN CLEAN TRASH", - "language": "zh-CN" -} ---- - - - -# ADMIN CLEAN TRASH -## description - 该语句用于清理 backend 内的垃圾数据。 - 语法: - ADMIN CLEAN TRASH [ON ("BackendHost1:BackendHeartBeatPort1", "BackendHost2:BackendHeartBeatPort2", ...)]; - - 说明: - 以 BackendHost:BackendHeartBeatPort 表示需要清理的 backend ,不添加on限定则清理所有 backend 。 - -## example - - 1. 清理所有be节点的垃圾数据。 - - ADMIN CLEAN TRASH; - - 2. 清理'192.168.0.1:9050'和'192.168.0.2:9050'的垃圾数据。 - - ADMIN CLEAN TRASH ON ("192.168.0.1:9050","192.168.0.2:9050"); - -## keyword - ADMIN, CLEAN, TRASH diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN COMPACT.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN COMPACT.md deleted file mode 100644 index 0b1077a98d..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN COMPACT.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -{ - "title": "ADMIN COMPACT", - "language": "zh-CN" -} ---- - - - -# ADMIN COMPACT -## description - - 该语句用于对指定表分区下的所有副本触发一次Compaction - - 语法: - - ADMIN COMPACT TABLE table_name PARTITION partition_name WHERE TYPE='BASE/CUMULATIVE' - - 说明: - - 1. 该语句仅表示让系统尝试将分区下每一个副本的compaction任务提交给compaction线程池,并不保证每一个副本的compaction任务都能成功执行。 - 2. 该语句每次只支持对表下的单个分区执行compaction。 - -## example - - 1. 对指定分区下的所有副本触发一次cumulative compaction - - ADMIN COMPACT TABLE tbl PARTITION par01 WHERE TYPE='CUMULATIVE'; - - 2. 对指定分区下的所有副本触发一次base compaction - - ADMIN COMPACT TABLE tbl PARTITION par01 WHERE TYPE='BASE'; - -## keyword - ADMIN,COMPACT - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md deleted file mode 100644 index 0bb78f5379..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN REBALANCE DISK.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -{ - "title": "ADMIN REBALANCE DISK", - "language": "zh-CN" -} ---- - - - -# ADMIN REBALANCE DISK -## description - - 该语句用于尝试优先均衡指定的BE磁盘数据 - - 语法: - - ADMIN REBALANCE DISK [ON ("BackendHost1:BackendHeartBeatPort1", "BackendHost2:BackendHeartBeatPort2", ...)]; - - 说明: - - 1. 该语句表示让系统尝试优先均衡指定BE的磁盘数据,不受限于集群是否均衡。 - 2. 默认的 timeout 是 24小时。超时意味着系统将不再优先均衡指定的BE磁盘数据。需要重新使用该命令设置。 - 3. 指定BE的磁盘数据均衡后,该BE的优先级将会失效。 - -## example - - 1. 尝试优先均衡集群内的所有BE - - ADMIN REBALANCE DISK; - - 2. 尝试优先均衡指定BE - - ADMIN REBALANCE DISK ON ("192.168.1.1:1234", "192.168.1.2:1234"); - -## keyword - ADMIN,REBALANCE,DISK - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN REPAIR.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN REPAIR.md deleted file mode 100644 index 851b0fa496..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN REPAIR.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -{ - "title": "ADMIN REPAIR", - "language": "zh-CN" -} ---- - - - -# ADMIN REPAIR -## description - - 该语句用于尝试优先修复指定的表或分区 - - 语法: - - ADMIN REPAIR TABLE table_name[ PARTITION (p1,...)] - - 说明: - - 1. 该语句仅表示让系统尝试以高优先级修复指定表或分区的分片副本,并不保证能够修复成功。用户可以通过 ADMIN SHOW REPLICA STATUS 命令查看修复情况。 - 2. 默认的 timeout 是 14400 秒(4小时)。超时意味着系统将不再以高优先级修复指定表或分区的分片副本。需要重新使用该命令设置。 - -## example - - 1. 尝试修复指定表 - - ADMIN REPAIR TABLE tbl1; - - 2. 尝试修复指定分区 - - ADMIN REPAIR TABLE tbl1 PARTITION (p1, p2); - -## keyword - ADMIN,REPAIR - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md deleted file mode 100644 index a67915a4bb..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SET CONFIG.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "ADMIN SET CONFIG", - "language": "zh-CN" -} ---- - - - -# ADMIN SET CONFIG -## description - - 该语句用于设置集群的配置项(当前仅支持设置FE的配置项)。 - 可设置的配置项,可以通过 ADMIN SHOW FRONTEND CONFIG; 命令查看。 - - 语法: - - ADMIN SET FRONTEND CONFIG ("key" = "value"); - -## example - - 1. 设置 'disable_balance' 为 true - - ADMIN SET FRONTEND CONFIG ("disable_balance" = "true"); - -## keyword - ADMIN,SET,CONFIG diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md deleted file mode 100644 index 71fd90fca4..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SET REPLICA STATUS.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -{ - "title": "ADMIN SET REPLICA STATUS", - "language": "zh-CN" -} ---- - - - -# ADMIN SET REPLICA STATUS -## description - - 该语句用于设置指定副本的状态。 - 该命令目前仅用于手动将某些副本状态设置为 BAD 或 OK,从而使得系统能够自动修复这些副本。 - - 语法: - - ADMIN SET REPLICA STATUS - PROPERTIES ("key" = "value", ...); - - 目前支持如下属性: - "tablet_id":必需。指定一个 Tablet Id. - "backend_id":必需。指定 Backend Id. - "status":必需。指定状态。当前仅支持 "bad" 或 "ok" - - 如果指定的副本不存在,或状态已经是 bad,则会被忽略。 - - 注意: - - 设置为 Bad 状态的副本可能立刻被删除,请谨慎操作。 - -## example - - 1. 设置 tablet 10003 在 BE 10001 上的副本状态为 bad。 - - ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10003", "backend_id" = "10001", "status" = "bad"); - - 2. 设置 tablet 10003 在 BE 10001 上的副本状态为 ok。 - - ADMIN SET REPLICA STATUS PROPERTIES("tablet_id" = "10003", "backend_id" = "10001", "status" = "ok"); - -## keyword - - ADMIN,SET,REPLICA,STATUS - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md deleted file mode 100644 index be8e4cabcd..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW CONFIG.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -{ - "title": "ADMIN SHOW CONFIG", - "language": "zh-CN" -} ---- - - - -# ADMIN SHOW CONFIG -## description - - 该语句用于展示当前集群的配置(当前仅支持展示 FE 的配置项) - - 语法: - - ADMIN SHOW FRONTEND CONFIG [LIKE "pattern"]; - - 说明: - - 结果中的各列含义如下: - 1. Key: 配置项名称 - 2. Value: 配置项值 - 3. Type: 配置项类型 - 4. IsMutable: 是否可以通过 ADMIN SET CONFIG 命令设置 - 5. MasterOnly: 是否仅适用于 Master FE - 6. Comment: 配置项说明 - -## example - - 1. 查看当前FE节点的配置 - - ADMIN SHOW FRONTEND CONFIG; - - 2. 使用like谓词搜索当前Fe节点的配置 - - mysql> ADMIN SHOW FRONTEND CONFIG LIKE '%check_java_version%'; - +--------------------+-------+---------+-----------+------------+---------+ - | Key | Value | Type | IsMutable | MasterOnly | Comment | - +--------------------+-------+---------+-----------+------------+---------+ - | check_java_version | true | boolean | false | false | | - +--------------------+-------+---------+-----------+------------+---------+ - 1 row in set (0.00 sec) - -## keyword - ADMIN,SHOW,CONFIG diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md deleted file mode 100644 index 6558981a77..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA DISTRIBUTION.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -{ - "title": "ADMIN SHOW REPLICA DISTRIBUTION", - "language": "zh-CN" -} ---- - - - -# ADMIN SHOW REPLICA DISTRIBUTION -## description - - 该语句用于展示一个表或分区副本分布状态 - - 语法: - - ADMIN SHOW REPLICA DISTRIBUTION FROM [db_name.]tbl_name [PARTITION (p1, ...)]; - - 说明: - - 结果中的 Graph 列以图形的形式展示副本分布比例 - -## example - - 1. 查看表的副本分布 - - ADMIN SHOW REPLICA DISTRIBUTION FROM tbl1; - - 2. 查看表的分区的副本分布 - - ADMIN SHOW REPLICA DISTRIBUTION FROM db1.tbl1 PARTITION(p1, p2); - -## keyword - ADMIN,SHOW,REPLICA,DISTRIBUTION - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md deleted file mode 100644 index ccebcd6d5d..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN SHOW REPLICA STATUS.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -{ - "title": "ADMIN SHOW REPLICA STATUS", - "language": "zh-CN" -} ---- - - - -# ADMIN SHOW REPLICA STATUS -## description - - 该语句用于展示一个表或分区的副本状态信息 - - 语法: - - ADMIN SHOW REPLICA STATUS FROM [db_name.]tbl_name [PARTITION (p1, ...)] - [where_clause]; - - where_clause: - WHERE STATUS [!]= "replica_status" - - replica_status: - OK: replica 处于健康状态 - DEAD: replica 所在 Backend 不可用 - VERSION_ERROR: replica 数据版本有缺失 - SCHEMA_ERROR: replica 的 schema hash 不正确 - MISSING: replica 不存在 - -## example - - 1. 查看表全部的副本状态 - - ADMIN SHOW REPLICA STATUS FROM db1.tbl1; - - 2. 查看表某个分区状态为 VERSION_ERROR 的副本 - - ADMIN SHOW REPLICA STATUS FROM tbl1 PARTITION (p1, p2) - WHERE STATUS = "VERSION_ERROR"; - - 3. 查看表所有状态不健康的副本 - - ADMIN SHOW REPLICA STATUS FROM tbl1 - WHERE STATUS != "OK"; - -## keyword - ADMIN,SHOW,REPLICA,STATUS - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md deleted file mode 100644 index 74d6e6699a..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ADMIN-DIAGNOSE-TABLET.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -{ - "title": "ADMIN DIAGNOSE TABLET", - "language": "zh-CN" -} ---- - - - -# ADMIN DIAGNOSE TABLET -## description - - 该语句用于诊断指定 tablet。结果中将显示这个 tablet 的信息和一些潜在的问题。 - - 语法: - - ADMIN DIAGNOSE TABLET tblet_id - - 说明: - - 结果中的各行信息如下: - 1. TabletExist: Tablet是否存在 - 2. TabletId: Tablet ID - 3. Database: Tablet 所属 DB 和其 ID - 4. Table: Tablet 所属 Table 和其 ID - 5. Partition: Tablet 所属 Partition 和其 ID - 6. MaterializedIndex: Tablet 所属物化视图和其 ID - 7. Replicas(ReplicaId -> BackendId): Tablet 各副本和其所在 BE。 - 8. ReplicasNum: 副本数量是否正确。 - 9. ReplicaBackendStatus: 副本所在 BE 节点是否正常。 - 10.ReplicaVersionStatus: 副本的版本号是否正常。 - 11.ReplicaStatus: 副本状态是否正常。 - 12.ReplicaCompactionStatus: 副本 Compaction 状态是否正常。 - -## example - - 1. 查看 Tablet 10001 的诊断结果 - - ADMIN DIAGNOSE TABLET 10001; - -## keyword - ADMIN,DIAGNOSE,TABLET diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ALTER CLUSTER.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ALTER CLUSTER.md deleted file mode 100644 index dc77914158..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ALTER CLUSTER.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -{ - "title": "ALTER CLUSTER", - "language": "zh-CN" -} ---- - - - -# ALTER CLUSTER -## description - - 该语句用于更新逻辑集群。需要有管理员权限 - - 语法 - - ALTER CLUSTER cluster_name PROPERTIES ("key"="value", ...); - - 1. 缩容,扩容 (根据集群现有的be数目,大则为扩容,小则为缩容), 扩容为同步操作,缩容为异步操作,通过backend的状态可以得知是否缩容完成 - - PROERTIES ("instance_num" = "3") - - instance_num 逻辑集群节点树 - -## example - - 1. 缩容,减少含有3个be的逻辑集群test_cluster的be数为2 - - ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="2"); - - 2. 扩容,增加含有3个be的逻辑集群test_cluster的be数为4 - - ALTER CLUSTER test_cluster PROPERTIES ("instance_num"="4"); - -## keyword - ALTER,CLUSTER - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ALTER SYSTEM.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ALTER SYSTEM.md deleted file mode 100644 index 8af7db553d..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ALTER SYSTEM.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -{ - "title": "ALTER SYSTEM", - "language": "zh-CN" -} ---- - - - -# ALTER SYSTEM -## description - - 该语句用于操作一个系统内的节点。(仅管理员使用!) - 语法: - 1) 增加节点(不使用多租户功能则按照此方法添加) - ALTER SYSTEM ADD BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 2) 增加空闲节点(即添加不属于任何cluster的BACKEND) - ALTER SYSTEM ADD FREE BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 3) 增加节点到某个cluster - ALTER SYSTEM ADD BACKEND TO cluster_name "host:heartbeat_port"[,"host:heartbeat_port"...]; - 4) 删除节点 - ALTER SYSTEM DROP BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 5) 节点下线 - ALTER SYSTEM DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - 6) 增加Broker - ALTER SYSTEM ADD BROKER broker_name "host:port"[,"host:port"...]; - 7) 减少Broker - ALTER SYSTEM DROP BROKER broker_name "host:port"[,"host:port"...]; - 8) 删除所有Broker - ALTER SYSTEM DROP ALL BROKER broker_name - 9) 设置一个 Load error hub,用于集中展示导入时的错误信息 - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES ("key" = "value"[, ...]); - 10) 修改一个 BE 节点的属性 - ALTER SYSTEM MODIFY BACKEND "host:heartbeat_port" SET ("key" = "value"[, ...]); - - 说明: - 1) host 可以是主机名或者ip地址 - 2) heartbeat_port 为该节点的心跳端口 - 3) 增加和删除节点为同步操作。这两种操作不考虑节点上已有的数据,节点直接从元数据中删除,请谨慎使用。 - 4) 节点下线操作用于安全下线节点。该操作为异步操作。如果成功,节点最终会从元数据中删除。如果失败,则不会完成下线。 - 5) 可以手动取消节点下线操作。详见 CANCEL DECOMMISSION - 6) Load error hub: - 当前支持两种类型的 Hub:Mysql 和 Broker。需在 PROPERTIES 中指定 "type" = "mysql" 或 "type" = "broker"。 - 如果需要删除当前的 load error hub,可以将 type 设为 null。 - 1) 当使用 Mysql 类型时,导入时产生的错误信息将会插入到指定的 mysql 库表中,之后可以通过 show load warnings 语句直接查看错误信息。 - - Mysql 类型的 Hub 需指定以下参数: - host:mysql host - port:mysql port - user:mysql user - password:mysql password - database:mysql database - table:mysql table - - 2) 当使用 Broker 类型时,导入时产生的错误信息会形成一个文件,通过 broker,写入到指定的远端存储系统中。须确保已经部署对应的 broker - Broker 类型的 Hub 需指定以下参数: - broker: broker 的名称 - path: 远端存储路径 - other properties: 其他访问远端存储所必须的信息,比如认证信息等。 - - 7) 修改 BE 节点属性目前支持以下属性: - - 1. tag.location:资源标签 - 2. disable_query: 查询禁用属性 - 3. disable_load: 导入禁用属性 - -## example - - 1. 增加一个节点 - ALTER SYSTEM ADD BACKEND "host:port"; - - 2. 增加一个空闲节点 - ALTER SYSTEM ADD FREE BACKEND "host:port"; - - 3. 删除两个节点 - ALTER SYSTEM DROP BACKEND "host1:port", "host2:port"; - - 4. 下线两个节点 - ALTER SYSTEM DECOMMISSION BACKEND "host1:port", "host2:port"; - - 5. 增加两个Hdfs Broker - ALTER SYSTEM ADD BROKER hdfs "host1:port", "host2:port"; - - 6. 添加一个 Mysql 类型的 load error hub - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES - ("type"= "mysql", - "host" = "192.168.1.17" - "port" = "3306", - "user" = "my_name", - "password" = "my_passwd", - "database" = "doris_load", - "table" = "load_errors" - ); - - 7. 添加一个 Broker 类型的 load error hub - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES - ("type"= "broker", - "name" = "bos", - "path" = "bos://backup-cmy/logs", - "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "069fc278xxxxxx24ddb522", - "bos_secret_accesskey"="700adb0c6xxxxxx74d59eaa980a" - ); - - 8. 删除当前的 load error hub - ALTER SYSTEM SET LOAD ERRORS HUB PROPERTIES - ("type"= "null"); - - 9. 修改 BE 的资源标签 - - ALTER SYSTEM MODIFY BACKEND "host1:9050" SET ("tag.location" = "group_a"); - - 10. 修改 BE 的查询禁用属性 - - ALTER SYSTEM MODIFY BACKEND "host1:9050" SET ("disable_query" = "true"); - - 11. 修改 BE 的导入禁用属性 - - ALTER SYSTEM MODIFY BACKEND "host1:9050" SET ("disable_load" = "true"); - -## keyword - ALTER,SYSTEM,BACKEND,BROKER,FREE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md b/docs/zh-CN/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md deleted file mode 100644 index ec5c7bfd1b..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/CANCEL DECOMMISSION.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -{ - "title": "CANCEL DECOMMISSION", - "language": "zh-CN" -} ---- - - - -# CANCEL DECOMMISSION -## description - - 该语句用于撤销一个节点下线操作。(仅管理员使用!) - 语法: - CANCEL DECOMMISSION BACKEND "host:heartbeat_port"[,"host:heartbeat_port"...]; - -## example - - 1. 取消两个节点的下线操作: - CANCEL DECOMMISSION BACKEND "host1:port", "host2:port"; - -## keyword - CANCEL,DECOMMISSION,BACKEND - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/CREATE CLUSTER.md b/docs/zh-CN/sql-reference/sql-statements/Administration/CREATE CLUSTER.md deleted file mode 100644 index daeaf197e9..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/CREATE CLUSTER.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -{ - "title": "CREATE CLUSTER", - "language": "zh-CN" -} ---- - - - -# CREATE CLUSTER -## description - - 该语句用于新建逻辑集群 (cluster), 需要管理员权限。如果不使用多租户,直接创建一个名称为default_cluster的cluster。否则创建一个自定义名称的cluster。 - - 语法 - - CREATE CLUSTER [IF NOT EXISTS] cluster_name - - PROPERTIES ("key"="value", ...) - - IDENTIFIED BY 'password' - - 1. PROPERTIES - - 指定逻辑集群的属性 - - PROPERTIES ("instance_num" = "3") - - instance_num 逻辑集群节点树 - - 2. identified by ‘password' 每个逻辑集群含有一个superuser,创建逻辑集群时必须指定其密码 - -## example - - 1. 新建一个含有3个be节点逻辑集群 test_cluster, 并指定其superuser用户密码 - - CREATE CLUSTER test_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; - - 2. 新建一个含有3个be节点逻辑集群 default_cluster(不使用多租户), 并指定其superuser用户密码 - - CREATE CLUSTER default_cluster PROPERTIES("instance_num"="3") IDENTIFIED BY 'test'; - -## keyword - CREATE,CLUSTER - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/CREATE FILE.md b/docs/zh-CN/sql-reference/sql-statements/Administration/CREATE FILE.md deleted file mode 100644 index a7aa928d8e..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/CREATE FILE.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -{ - "title": "CREATE FILE", - "language": "zh-CN" -} ---- - - - -# CREATE FILE -## description - - 该语句用于创建并上传一个文件到 Doris 集群。 - 该功能通常用于管理一些其他命令中需要使用到的文件,如证书、公钥私钥等等。 - - 该命令只用 admin 权限用户可以执行。 - 某个文件都归属与某一个的 database。对 database 拥有访问权限的用户都可以使用该文件。 - - 单个文件大小限制为 1MB。 - 一个 Doris 集群最多上传 100 个文件。 - - 语法: - - CREATE FILE "file_name" [IN database] - [properties] - - 说明: - file_name: 自定义文件名。 - database: 文件归属于某一个 db,如果没有指定,则使用当前 session 的 db。 - properties 支持以下参数: - - url: 必须。指定一个文件的下载路径。当前仅支持无认证的 http 下载路径。命令执行成功后,文件将被保存在 doris 中,该 url 将不再需要。 - catalog: 必须。对文件的分类名,可以自定义。但在某些命令中,会查找指定 catalog 中的文件。比如例行导入中的,数据源为 kafka 时,会查找 catalog 名为 kafka 下的文件。 - md5: 可选。文件的 md5。如果指定,会在下载文件后进行校验。 - -## example - - 1. 创建文件 ca.pem ,分类为 kafka - - CREATE FILE "ca.pem" - PROPERTIES - ( - "url" = "https://test.bj.bcebos.com/kafka-key/ca.pem", - "catalog" = "kafka" - ); - - 2. 创建文件 client.key,分类为 my_catalog - - CREATE FILE "client.key" - IN my_database - PROPERTIES - ( - "url" = "https://test.bj.bcebos.com/kafka-key/client.key", - "catalog" = "my_catalog", - "md5" = "b5bb901bf10f99205b39a46ac3557dd9" - ); - -## keyword - CREATE,FILE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/DROP CLUSTER.md b/docs/zh-CN/sql-reference/sql-statements/Administration/DROP CLUSTER.md deleted file mode 100644 index 6b2875f996..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/DROP CLUSTER.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "DROP CLUSTER", - "language": "zh-CN" -} ---- - - - -# DROP CLUSTER -## description - - 该语句用于删除逻辑集群,成功删除逻辑集群需要首先删除集群内的db,需要管理员权限 - - 语法 - - DROP CLUSTER [IF EXISTS] cluster_name - -## example - - 删除逻辑集群 test_cluster - - DROP CLUSTER test_cluster; - -## keyword - DROP,CLUSTER - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/DROP FILE.md b/docs/zh-CN/sql-reference/sql-statements/Administration/DROP FILE.md deleted file mode 100644 index 4348832ea9..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/DROP FILE.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -{ - "title": "DROP FILE", - "language": "zh-CN" -} ---- - - - -# DROP FILE -## description - - 该语句用于删除一个已上传的文件。 - - 语法: - - DROP FILE "file_name" [FROM database] - [properties] - - 说明: - file_name: 文件名。 - database: 文件归属的某一个 db,如果没有指定,则使用当前 session 的 db。 - properties 支持以下参数: - - catalog: 必须。文件所属分类。 - -## example - - 1. 删除文件 ca.pem - - DROP FILE "ca.pem" properties("catalog" = "kafka"); - -## keyword - DROP,FILE diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/ENTER.md b/docs/zh-CN/sql-reference/sql-statements/Administration/ENTER.md deleted file mode 100644 index 2af0c69e7d..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/ENTER.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "ENTER", - "language": "zh-CN" -} ---- - - - -# ENTER -## description - - 该语句用于进入一个逻辑集群, 所有创建用户、创建数据库都需要在一个逻辑集群内执行,创建后并且隶属于这个逻 - - 辑集群,需要管理员权限 - - ENTER cluster_name - -## example - - 1. 进入逻辑集群test_cluster - - ENTER test_cluster; - -## keyword - ENTER - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md b/docs/zh-CN/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md deleted file mode 100644 index 4a8098fab6..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/INSTALL PLUGIN.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -{ - "title": "INSTALL PLUGIN", - "language": "zh-CN" -} ---- - - - -# INSTALL PLUGIN -## description - - 该语句用于安装一个插件。 - - 语法 - - INSTALL PLUGIN FROM [source] [PROPERTIES ("key"="value", ...)] - - source 支持三种类型: - - 1. 指向一个 zip 文件的绝对路径。 - 2. 指向一个插件目录的绝对路径。 - 3. 指向一个 http 或 https 协议的 zip 文件下载路径 - - PROPERTIES 支持设置插件的一些配置,如设置zip文件的md5sum的值等。 - -## example - - 1. 安装一个本地 zip 文件插件: - - INSTALL PLUGIN FROM "/home/users/doris/auditdemo.zip"; - - 2. 安装一个本地目录中的插件: - - INSTALL PLUGIN FROM "/home/users/doris/auditdemo/"; - - 3. 下载并安装一个插件: - - INSTALL PLUGIN FROM "http://mywebsite.com/plugin.zip"; - - 4. 下载并安装一个插件,同时设置了zip文件的md5sum的值: - - INSTALL PLUGIN FROM "http://mywebsite.com/plugin.zip" PROPERTIES("md5sum" = "73877f6029216f4314d712086a146570"); - -## keyword - INSTALL,PLUGIN diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/LINK DATABASE.md b/docs/zh-CN/sql-reference/sql-statements/Administration/LINK DATABASE.md deleted file mode 100644 index d86e43a682..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/LINK DATABASE.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -{ - "title": "LINK DATABASE", - "language": "zh-CN" -} ---- - - - -# LINK DATABASE -## description - - (已废弃!!!) - 该语句用户链接一个逻辑集群的数据库到另外一个逻辑集群, 一个数据库只允许同时被链接一次,删除链接的数据库 - - 并不会删除数据,并且被链接的数据库不能被删除, 需要管理员权限 - - 语法 - - LINK DATABASE src_cluster_name.src_db_name des_cluster_name.des_db_name - -## example - - 1. 链接test_clusterA中的test_db到test_clusterB,并命名为link_test_db - - LINK DATABASE test_clusterA.test_db test_clusterB.link_test_db; - - 2. 删除链接的数据库link_test_db - - DROP DATABASE link_test_db; - -## keyword - LINK,DATABASE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md b/docs/zh-CN/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md deleted file mode 100644 index 18fba71e8f..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/MIGRATE DATABASE.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -{ - "title": "MIGRATE DATABASE", - "language": "zh-CN" -} ---- - - - -# MIGRATE DATABASE -## description - - (已废弃!!!) - 该语句用于迁移一个逻辑集群的数据库到另外一个逻辑集群,执行此操作前数据库必须已经处于链接状态, 需要管理 - - 员权限 - - 语法 - - MIGRATE DATABASE src_cluster_name.src_db_name des_cluster_name.des_db_name - -## example - - 1. 迁移test_clusterA中的test_db到test_clusterB - - MIGRATE DATABASE test_clusterA.test_db test_clusterB.link_test_db; - -## keyword - MIGRATE,DATABASE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md deleted file mode 100644 index cd3b600880..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SET LDAP_ADMIN_PASSWORD.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "SET LDAP_ADMIN_PASSWORD", - "language": "zh-CN" -} ---- - - - -# SET LDAP_ADMIN_PASSWORD -## description - -Syntax: - - SET LDAP_ADMIN_PASSWORD = 'plain password' - - SET LDAP_ADMIN_PASSWORD 命令用于设置LDAP管理员密码。使用LDAP认证时,doris需使用管理员账户和密码来向LDAP服务查询登录用户的信息。 - -## example - -1. 设置LDAP管理员密码 -``` -SET LDAP_ADMIN_PASSWORD = '123456' -``` - -## keyword - SET, LDAP, LDAP_ADMIN_PASSWORD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW BACKENDS.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW BACKENDS.md deleted file mode 100644 index 5cdbdeb377..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW BACKENDS.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "SHOW BACKENDS", - "language": "zh-CN" -} ---- - - - -# SHOW BACKENDS -## description - 该语句用于查看 cluster 内的 BE 节点 - 语法: - SHOW BACKENDS; - - 说明: - 1. LastStartTime 表示最近一次 BE 启动时间。 - 2. LastHeartbeat 表示最近一次心跳。 - 3. Alive 表示节点是否存活。 - 4. SystemDecommissioned 为 true 表示节点正在安全下线中。 - 5. ClusterDecommissioned 为 true 表示节点正在冲当前cluster中下线。 - 6. TabletNum 表示该节点上分片数量。 - 7. DataUsedCapacity 表示实际用户数据所占用的空间。 - 8. AvailCapacity 表示磁盘的可使用空间。 - 9. TotalCapacity 表示总磁盘空间。TotalCapacity = AvailCapacity + DataUsedCapacity + 其他非用户数据文件占用空间。 - 10. UsedPct 表示磁盘已使用量百分比。 - 11. ErrMsg 用于显示心跳失败时的错误信息。 - 12. Status 用于以 JSON 格式显示BE的一些状态信息, 目前包括最后一次BE汇报其tablet的时间信息。 - -## keyword - SHOW, BACKENDS - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW BROKER.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW BROKER.md deleted file mode 100644 index 8dd0ae6401..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW BROKER.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -{ - "title": "SHOW BROKER", - "language": "zh-CN" -} ---- - - - -# SHOW BROKER -## description - 该语句用于查看当前存在的 broker - 语法: - SHOW BROKER; - - 说明: - 1. LastStartTime 表示最近一次 BE 启动时间。 - 2. LastHeartbeat 表示最近一次心跳。 - 3. Alive 表示节点是否存活。 - 4. ErrMsg 用于显示心跳失败时的错误信息。 - -## keyword - SHOW, BROKER - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FILE.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FILE.md deleted file mode 100644 index 6787e80b9f..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FILE.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -{ - "title": "SHOW FILE", - "language": "zh-CN" -} ---- - - - -# SHOW FILE -## description - - 该语句用于展示一个 database 内创建的文件 - - 语法: - - SHOW FILE [FROM database]; - - 说明: - - FileId: 文件ID,全局唯一 - DbName: 所属数据库名称 - Catalog: 自定义分类 - FileName: 文件名 - FileSize: 文件大小,单位字节 - MD5: 文件的 MD5 - -## example - - 1. 查看数据库 my_database 中已上传的文件 - - SHOW FILE FROM my_database; - -## keyword - SHOW,FILE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md deleted file mode 100644 index db0077456a..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FRONTENDS.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "SHOW FRONTENDS", - "language": "zh-CN" -} ---- - - - -# SHOW FRONTENDS -## description - 该语句用于查看 FE 节点 - 语法: - SHOW FRONTENDS; - - 说明: - 1. name 表示该 FE 节点在 bdbje 中的名称。 - 2. Join 为 true 表示该节点曾经加入过集群。但不代表当前还在集群内(可能已失联) - 3. Alive 表示节点是否存活。 - 4. ReplayedJournalId 表示该节点当前已经回放的最大元数据日志id。 - 5. LastHeartbeat 是最近一次心跳。 - 6. IsHelper 表示该节点是否是 bdbje 中的 helper 节点。 - 7. ErrMsg 用于显示心跳失败时的错误信息。 - -## keyword - SHOW, FRONTENDS - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md deleted file mode 100644 index 12194c58e9..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW FULL COLUMNS.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -{ - "title": "SHOW FULL COLUMNS", - "language": "zh-CN" -} ---- - - - -# SHOW FULL COLUMNS -## description - 该语句用于指定表的列信息 - 语法: - SHOW FULL COLUMNS FROM tbl; - -## example - 1. 查看指定表的列信息 - - SHOW FULL COLUMNS FROM tbl; - -## keyword - - SHOW,TABLE,STATUS diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW INDEX.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW INDEX.md deleted file mode 100644 index 81bba8c08d..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW INDEX.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "SHOW INDEX", - "language": "zh-CN" -} ---- - - - -# SHOW INDEX - -## description - - 该语句用于展示一个表中索引的相关信息,目前只支持bitmap 索引 - 语法: - SHOW INDEX[ES] FROM [db_name.]table_name [FROM database]; - 或者 - SHOW KEY[S] FROM [db_name.]table_name [FROM database]; - -## example - - 1. 展示指定 table_name 的下索引 - SHOW INDEX FROM example_db.table_name; - -## keyword - - SHOW,INDEX diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md deleted file mode 100644 index a8b80affe9..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW MIGRATIONS.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -{ - "title": "SHOW MIGRATIONS", - "language": "zh-CN" -} ---- - - - -# SHOW MIGRATIONS -## description - - 该语句用于查看数据库迁移的进度 - - 语法 - - SHOW MIGRATIONS - -## keyword - SHOW,MIGRATIONS - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW PLUGINS.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW PLUGINS.md deleted file mode 100644 index 073ac4b7a9..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW PLUGINS.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "SHOW PLUGINS", - "language": "zh-CN" -} ---- - - - -# SHOW PLUGINS -## description - - 该语句用于展示已安装的插件。 - - 语法 - - SHOW PLUGINS; - - 该命令会展示所有用户安装的和系统内置的插件。 - -## example - - 1. 展示已安装的插件: - - SHOW PLUGINS; - -## keyword - SHOW, PLUGIN, PLUGINS \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md deleted file mode 100644 index 8534d69f21..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW TABLE STATUS.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -{ - "title": "SHOW TABLE STATUS", - "language": "zh-CN" -} ---- - - - -# SHOW TABLE STATUS -## description - 该语句用于查看 Table 的一些信息。 - 语法: - SHOW TABLE STATUS - [FROM db] [LIKE "pattern"] - - 说明: - 1. 该语句主要用于兼容 MySQL 语法,目前仅显示 Comment 等少量信息 - -## example - 1. 查看当前数据库下所有表的信息 - - SHOW TABLE STATUS; - - 2. 查看指定数据库下,名称包含 example 的表的信息 - - SHOW TABLE STATUS FROM db LIKE "%example%"; - -## keyword - - SHOW,TABLE,STATUS diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW TRASH.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW TRASH.md deleted file mode 100644 index 5f2d3dfb78..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW TRASH.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "SHOW TRASH", - "language": "zh-CN" -} ---- - - - -# SHOW TRASH -## description - 该语句用于查看 backend 内的垃圾数据占用空间。 - 语法: - SHOW TRASH [ON BackendHost:BackendHeartBeatPort]; - - 说明: - 1. Backend 格式为该节点的BackendHost:BackendHeartBeatPort。 - 2. TrashUsedCapacity 表示该节点垃圾数据占用空间。 - -## example - - 1. 查看所有be节点的垃圾数据占用空间。 - - SHOW TRASH; - - 2. 查看'192.168.0.1:9050'的垃圾数据占用空间(会显示具体磁盘信息)。 - - SHOW TRASH ON "192.168.0.1:9050"; - -## keyword - SHOW, TRASH - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW VIEW.md b/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW VIEW.md deleted file mode 100644 index 907104f2ec..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/SHOW VIEW.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "SHOW DELETE", - "language": "zh-CN" -} ---- - - - -# SHOW VIEW -## description - 该语句用于展示基于给定表建立的所有视图 - 语法: - SHOW VIEW { FROM | IN } table [ FROM db ] - -## example - 1. 展示基于表 testTbl 建立的所有视图 view - SHOW VIEW FROM testTbl; - -## keyword - SHOW,VIEW - diff --git a/docs/zh-CN/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md b/docs/zh-CN/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md deleted file mode 100644 index 53dff3ce2c..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Administration/UNINSTALL PLUGIN.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -{ - "title": "UNINSTALL PLUGIN", - "language": "zh-CN" -} ---- - - - -# UNINSTALL PLUGIN -## description - - 该语句用于卸载一个插件。 - - 语法 - - UNINSTALL PLUGIN plugin_name; - - plugin_name 可以通过 `SHOW PLUGINS;` 命令查看。 - - 只能卸载非 builtin 的插件。 - -## example - - 1. 卸载一个插件: - - UNINSTALL PLUGIN auditdemo; - -## keyword - UNINSTALL,PLUGIN \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md deleted file mode 100644 index 29740925dd..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER DATABASE.md +++ /dev/null @@ -1,62 +0,0 @@ ---- -{ - "title": "ALTER DATABASE", - "language": "zh-CN" -} ---- - - - -# ALTER DATABASE -## description - 该语句用于设置指定数据库的属性。(仅管理员使用) - 语法: - 1) 设置数据库数据量配额,单位为B/K/KB/M/MB/G/GB/T/TB/P/PB - ALTER DATABASE db_name SET DATA QUOTA quota; - - 2) 重命名数据库 - ALTER DATABASE db_name RENAME new_db_name; - - 3) 设置数据库的副本数量配额 - ALTER DATABASE db_name SET REPLICA QUOTA quota; - - 说明: - 重命名数据库后,如需要,请使用 REVOKE 和 GRANT 命令修改相应的用户权限。 - 数据库的默认数据量配额为1024GB,默认副本数量配额为1073741824。 - -## example - 1. 设置指定数据库数据量配额 - ALTER DATABASE example_db SET DATA QUOTA 10995116277760; - 上述单位为字节,等价于 - ALTER DATABASE example_db SET DATA QUOTA 10T; - - ALTER DATABASE example_db SET DATA QUOTA 100G; - - ALTER DATABASE example_db SET DATA QUOTA 200M; - - 2. 将数据库 example_db 重命名为 example_db2 - ALTER DATABASE example_db RENAME example_db2; - - 3. 设定指定数据库副本数量配额 - ALTER DATABASE example_db SET REPLICA QUOTA 102400; - -## keyword - ALTER,DATABASE,RENAME - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md deleted file mode 100644 index 79cd27861f..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER RESOURCE.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -{ - "title": "ALTER RESOURCE", - "language": "zh-CN" -} ---- - - - -# ALTER RESOURCE - -## Description - - 该语句用于修改一个已有的资源。仅 root 或 admin 用户可以修改资源。 - 语法: - ALTER RESOURCE 'resource_name' - PROPERTIES ("key"="value", ...); - - 注意:resource type 不支持修改。 - -## Example - - 1. 修改名为 spark0 的 Spark 资源的工作目录: - ALTER RESOURCE 'spark0' PROPERTIES ("working_dir" = "hdfs://127.0.0.1:10000/tmp/doris_new"); - - 2. 修改名为 remote_s3 的 S3 资源的最大连接数: - ALTER RESOURCE 'remote_s3' PROPERTIES ("s3_max_connections" = "100"); - -## keyword - - ALTER, RESOURCE diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER TABLE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER TABLE.md deleted file mode 100644 index 5201cea9b8..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER TABLE.md +++ /dev/null @@ -1,441 +0,0 @@ ---- -{ - "title": "ALTER TABLE", - "language": "zh-CN" -} ---- - - - -# ALTER TABLE - -## description - - 该语句用于对已有的 table 进行修改。如果没有指定 rollup index,默认操作 base index。 - 该语句分为三种操作类型: schema change 、rollup 、partition - 这三种操作类型不能同时出现在一条 ALTER TABLE 语句中。 - 其中 schema change 和 rollup 是异步操作,任务提交成功则返回。之后可使用 SHOW ALTER 命令查看进度。 - partition 是同步操作,命令返回表示执行完毕。 - - 语法: - ALTER TABLE [database.]table - alter_clause1[, alter_clause2, ...]; - - alter_clause 分为 partition 、rollup、schema change、rename 和index五种。 - - partition 支持如下几种修改方式 - 1. 增加分区 - 语法: - ADD PARTITION [IF NOT EXISTS] partition_name - partition_desc ["key"="value"] - [DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]] - 注意: - 1) partition_desc 支持以下两种写法: - * VALUES LESS THAN [MAXVALUE|("value1", ...)] - * VALUES [("value1", ...), ("value1", ...)) - 1) 分区为左闭右开区间,如果用户仅指定右边界,系统会自动确定左边界 - 2) 如果没有指定分桶方式,则自动使用建表使用的分桶方式 - 3) 如指定分桶方式,只能修改分桶数,不可修改分桶方式或分桶列 - 4) ["key"="value"] 部分可以设置分区的一些属性,具体说明见 CREATE TABLE - 5) 如果建表时用户未显式创建Partition,则不支持通过ALTER的方式增加分区 - - 2. 删除分区 - 语法: - DROP PARTITION [IF EXISTS] partition_name [FORCE] - 注意: - 1) 使用分区方式的表至少要保留一个分区。 - 2) 执行 DROP PARTITION 一段时间内,可以通过 RECOVER 语句恢复被删除的分区。详见 RECOVER 语句 - 3) 如果执行 DROP PARTITION FORCE,则系统不会检查该分区是否存在未完成的事务,分区将直接被删除并且不能被恢复,一般不建议执行此操作 - - 3. 修改分区属性 - 语法: - MODIFY PARTITION p1|(p1[, p2, ...]) SET ("key" = "value", ...) - 说明: - 1) 当前支持修改分区的下列属性: - - storage_medium - - storage_cooldown_time - - remote_storage_cooldown_time - - replication_num - — in_memory - 2) 对于单分区表,partition_name 同表名。 - - rollup 支持如下几种创建方式: - 1. 创建 rollup index - 语法: - ADD ROLLUP rollup_name (column_name1, column_name2, ...) - [FROM from_index_name] - [PROPERTIES ("key"="value", ...)] - - properties: 支持设置超时时间,默认超时时间为1天。 - 例子: - ADD ROLLUP r1(col1,col2) from r0 - 1.2 批量创建 rollup index - 语法: - ADD ROLLUP [rollup_name (column_name1, column_name2, ...) - [FROM from_index_name] - [PROPERTIES ("key"="value", ...)],...] - 例子: - ADD ROLLUP r1(col1,col2) from r0, r2(col3,col4) from r0 - 1.3 注意: - 1) 如果没有指定 from_index_name,则默认从 base index 创建 - 2) rollup 表中的列必须是 from_index 中已有的列 - 3) 在 properties 中,可以指定存储格式。具体请参阅 CREATE TABLE - - 2. 删除 rollup index - 语法: - DROP ROLLUP rollup_name [PROPERTIES ("key"="value", ...)] - 例子: - DROP ROLLUP r1 - 2.1 批量删除 rollup index - 语法:DROP ROLLUP [rollup_name [PROPERTIES ("key"="value", ...)],...] - 例子:DROP ROLLUP r1,r2 - 2.2 注意: - 1) 不能删除 base index - - schema change 支持如下几种修改方式: - 1. 向指定 index 的指定位置添加一列 - 语法: - ADD COLUMN column_name column_type [KEY | agg_type] [DEFAULT "default_value"] - [AFTER column_name|FIRST] - [TO rollup_index_name] - [PROPERTIES ("key"="value", ...)] - 注意: - 1) 聚合模型如果增加 value 列,需要指定 agg_type - 2) 非聚合模型(如 DUPLICATE KEY)如果增加key列,需要指定KEY关键字 - 3) 不能在 rollup index 中增加 base index 中已经存在的列 - 如有需要,可以重新创建一个 rollup index) - - 2. 向指定 index 添加多列 - 语法: - ADD COLUMN (column_name1 column_type [KEY | agg_type] DEFAULT "default_value", ...) - [TO rollup_index_name] - [PROPERTIES ("key"="value", ...)] - 注意: - 1) 聚合模型如果增加 value 列,需要指定agg_type - 2) 非聚合模型如果增加key列,需要指定KEY关键字 - 3) 不能在 rollup index 中增加 base index 中已经存在的列 - (如有需要,可以重新创建一个 rollup index) - - 3. 从指定 index 中删除一列 - 语法: - DROP COLUMN column_name - [FROM rollup_index_name] - 注意: - 1) 不能删除分区列 - 2) 如果是从 base index 中删除列,则如果 rollup index 中包含该列,也会被删除 - - 4. 修改指定 index 的列类型以及列位置 - 语法: - MODIFY COLUMN column_name column_type [KEY | agg_type] [NULL | NOT NULL] [DEFAULT "default_value"] - [AFTER column_name|FIRST] - [FROM rollup_index_name] - [PROPERTIES ("key"="value", ...)] - 注意: - 1) 聚合模型如果修改 value 列,需要指定 agg_type - 2) 非聚合类型如果修改key列,需要指定KEY关键字 - 3) 只能修改列的类型,列的其他属性维持原样(即其他属性需在语句中按照原属性显式的写出,参见 example 8) - 4) 分区列和分桶列不能做任何修改 - 5) 目前支持以下类型的转换(精度损失由用户保证) - TINYINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE 类型向范围更大的数字类型转换 - TINTINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE/DECIMAL 转换成 VARCHAR - VARCHAR 支持修改最大长度 - VARCHAR/CHAR 转换成 TINTINT/SMALLINT/INT/BIGINT/LARGEINT/FLOAT/DOUBLE - VARCHAR/CHAR 转换成 DATE (目前支持"%Y-%m-%d", "%y-%m-%d", "%Y%m%d", "%y%m%d", "%Y/%m/%d, "%y/%m/%d"六种格式化格式) - DATETIME 转换成 DATE(仅保留年-月-日信息, 例如: `2019-12-09 21:47:05` <--> `2019-12-09`) - DATE 转换成 DATETIME(时分秒自动补零, 例如: `2019-12-09` <--> `2019-12-09 00:00:00`) - FLOAT 转换成 DOUBLE - INT 转换成 DATE (如果INT类型数据不合法则转换失败,原始数据不变) - 6) 不支持从NULL转为NOT NULL - - 5. 对指定 index 的列进行重新排序 - 语法: - ORDER BY (column_name1, column_name2, ...) - [FROM rollup_index_name] - [PROPERTIES ("key"="value", ...)] - 注意: - 1) index 中的所有列都要写出来 - 2) value 列在 key 列之后 - - 6. 修改table的属性,目前支持修改bloom filter列, colocate_with 属性和dynamic_partition属性,replication_num和default.replication_num属性 - 语法: - PROPERTIES ("key"="value") - 注意: - 也可以合并到上面的schema change操作中来修改,见下面例子 - - 7. 启用批量删除支持 - 语法: - ENABLE FEATURE "BATCH_DELETE" - 注意: - 1) 只能用在unique 表 - 2) 用于旧表支持批量删除功能,新表创建时已经支持 - - 8. 启用按照sequence column的值来保证导入顺序的功能 - 语法: - ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date") - 注意: - 1)只能用在unique表 - 2) sequence_type用来指定sequence列的类型,可以为整型和时间类型 - 3) 只支持新导入数据的有序性,历史数据无法更改 - - 9. 修改表的分区默认分桶数 - 语法: - MODIFY DISTRIBUTION DISTRIBUTED BY HASH (k1[,k2 ...]) BUCKETS num - 注意: - 1)只能用在分区类型为RANGE,采用哈希分桶的非colocate表 - - 10. 修改表注释 - 语法: - MODIFY COMMENT "new table comment" - - 11. 修改列注释 - 语法: - MODIFY COLUMN col1 COMMENT "new column comment" - - 12. 修改引擎类型 - - 仅支持将 MySQL 类型修改为 ODBC 类型。driver 的值为 odbc.init 配置中的 driver 名称。 - - 语法: - MODIFY ENGINE TO odbc PROPERTIES("driver" = "MySQL"); - - rename 支持对以下名称进行修改: - 1. 修改表名 - 语法: - RENAME new_table_name; - - 2. 修改 rollup index 名称 - 语法: - RENAME ROLLUP old_rollup_name new_rollup_name; - - 3. 修改 partition 名称 - 语法: - RENAME PARTITION old_partition_name new_partition_name; - - replace 支持对两个表进行原子的替换操作: - 1. 替换表 - 语法: - REPLACE WITH TABLE new_table [PROPERTIES('swap' = 'true')]; - 注意: - 1. 如果 swap 参数为 true,两表交换数据。 - 2. 如果 swap 参数为 false,用 new_table 表中的数据替换 old_table 表中的数据,并删除 new_table 表。 - - bitmap index 支持如下几种修改方式 - 1. 创建bitmap 索引 - 语法: - ADD INDEX [IF NOT EXISTS] index_name (column [, ...],) [USING BITMAP] [COMMENT 'balabala']; - 注意: - 1. 目前仅支持bitmap 索引 - 2. BITMAP 索引仅在单列上创建 - - 2. 删除索引 - 语法: - DROP INDEX [IF EXISTS] index_name; - -## example - - [table] - 1. 修改表的默认副本数量, 新建分区副本数量默认使用此值 - ALTER TABLE example_db.my_table - SET ("default.replication_num" = "2"); - - 2. 修改单分区表的实际副本数量(只限单分区表) - ALTER TABLE example_db.my_table - SET ("replication_num" = "3"); - - [partition] - 1. 增加分区, 现有分区 [MIN, 2013-01-01),增加分区 [2013-01-01, 2014-01-01),使用默认分桶方式 - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES LESS THAN ("2014-01-01"); - - 2. 增加分区,使用新的分桶数 - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES LESS THAN ("2015-01-01") - DISTRIBUTED BY HASH(k1) BUCKETS 20; - - 3. 增加分区,使用新的副本数 - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES LESS THAN ("2015-01-01") - ("replication_num"="1"); - - 4. 修改分区副本数 - ALTER TABLE example_db.my_table - MODIFY PARTITION p1 SET("replication_num"="1"); - - 5. 批量修改指定分区 - ALTER TABLE example_db.my_table - MODIFY PARTITION (p1, p2, p4) SET("in_memory"="true"); - - 6. 批量修改所有分区 - ALTER TABLE example_db.my_table - MODIFY PARTITION (*) SET("storage_medium"="HDD"); - - 7. 删除分区 - ALTER TABLE example_db.my_table - DROP PARTITION p1; - - 8. 增加一个指定上下界的分区 - - ALTER TABLE example_db.my_table - ADD PARTITION p1 VALUES [("2014-01-01"), ("2014-02-01")); - - [rollup] - 1. 创建 index: example_rollup_index,基于 base index(k1,k2,k3,v1,v2)。列式存储。 - ALTER TABLE example_db.my_table - ADD ROLLUP example_rollup_index(k1, k3, v1, v2); - - 2. 创建 index: example_rollup_index2,基于 example_rollup_index(k1,k3,v1,v2) - ALTER TABLE example_db.my_table - ADD ROLLUP example_rollup_index2 (k1, v1) - FROM example_rollup_index; - - 3. 创建 index: example_rollup_index3, 基于 base index (k1,k2,k3,v1), 自定义 rollup 超时时间一小时。 - ALTER TABLE example_db.my_table - ADD ROLLUP example_rollup_index(k1, k3, v1) - PROPERTIES("timeout" = "3600"); - - 4. 删除 index: example_rollup_index2 - ALTER TABLE example_db.my_table - DROP ROLLUP example_rollup_index2; - - - - [schema change] - 1. 向 example_rollup_index 的 col1 后添加一个key列 new_col(非聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN new_col INT KEY DEFAULT "0" AFTER col1 - TO example_rollup_index; - - 2. 向example_rollup_index的col1后添加一个value列new_col(非聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN new_col INT DEFAULT "0" AFTER col1 - TO example_rollup_index; - - 3. 向example_rollup_index的col1后添加一个key列new_col(聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN new_col INT DEFAULT "0" AFTER col1 - TO example_rollup_index; - - 4. 向example_rollup_index的col1后添加一个value列new_col SUM聚合类型(聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN new_col INT SUM DEFAULT "0" AFTER col1 - TO example_rollup_index; - - 5. 向 example_rollup_index 添加多列(聚合模型) - ALTER TABLE example_db.my_table - ADD COLUMN (col1 INT DEFAULT "1", col2 FLOAT SUM DEFAULT "2.3") - TO example_rollup_index; - - 6. 从 example_rollup_index 删除一列 - ALTER TABLE example_db.my_table - DROP COLUMN col2 - FROM example_rollup_index; - - 7. 修改 base index 的 key 列 col1 的类型为 BIGINT,并移动到 col2 列后面 - (*注意,无论是修改 key 列还是 value 列都需要声明完整的 column 信息*) 例如:MODIFY COLUMN xxx COLUMNTYPE [KEY|agg_type] - ALTER TABLE example_db.my_table - MODIFY COLUMN col1 BIGINT KEY DEFAULT "1" AFTER col2; - - 8. 修改 base index 的 val1 列最大长度。原 val1 为 (val1 VARCHAR(32) REPLACE DEFAULT "abc") - ALTER TABLE example_db.my_table - MODIFY COLUMN val1 VARCHAR(64) REPLACE DEFAULT "abc"; - - 9. 重新排序 example_rollup_index 中的列(设原列顺序为:k1,k2,k3,v1,v2) - ALTER TABLE example_db.my_table - ORDER BY (k3,k1,k2,v2,v1) - FROM example_rollup_index; - - 10. 同时执行两种操作 - ALTER TABLE example_db.my_table - ADD COLUMN v2 INT MAX DEFAULT "0" AFTER k2 TO example_rollup_index, - ORDER BY (k3,k1,k2,v2,v1) FROM example_rollup_index; - - 11. 修改表的 bloom filter 列 - ALTER TABLE example_db.my_table SET ("bloom_filter_columns"="k1,k2,k3"); - - 也可以合并到上面的 schema change 操作中(注意多子句的语法有少许区别) - ALTER TABLE example_db.my_table - DROP COLUMN col2 - PROPERTIES ("bloom_filter_columns"="k1,k2,k3"); - - 12. 修改表的Colocate 属性 - - ALTER TABLE example_db.my_table set ("colocate_with" = "t1"); - - 13. 将表的分桶方式由 Hash Distribution 改为 Random Distribution - - ALTER TABLE example_db.my_table set ("distribution_type" = "random"); - - 14. 修改表的动态分区属性(支持未添加动态分区属性的表添加动态分区属性) - ALTER TABLE example_db.my_table set ("dynamic_partition.enable" = "false"); - - 如果需要在未添加动态分区属性的表中添加动态分区属性,则需要指定所有的动态分区属性 - (注:非分区表不支持添加动态分区属性) - ALTER TABLE example_db.my_table set ("dynamic_partition.enable" = "true", "dynamic_partition.time_unit" = "DAY", "dynamic_partition.end" = "3", "dynamic_partition.prefix" = "p", "dynamic_partition.buckets" = "32"); - - 15. 修改表的 in_memory 属性 - - ALTER TABLE example_db.my_table set ("in_memory" = "true"); - 16. 启用 批量删除功能 - ALTER TABLE example_db.my_table ENABLE FEATURE "BATCH_DELETE" - 17. 启用按照sequence column的值来保证导入顺序的功能 - - ALTER TABLE example_db.my_table ENABLE FEATURE "SEQUENCE_LOAD" WITH PROPERTIES ("function_column.sequence_type" = "Date") - - 18. 将表的默认分桶数改为50 - - ALTER TABLE example_db.my_table MODIFY DISTRIBUTION DISTRIBUTED BY HASH(k1) BUCKETS 50; - - 19. 修改表注释 - - ALTER TABLE example_db.my_table MODIFY COMMENT "new comment"; - - 20. 修改列注释 - - ALTER TABLE example_db.my_table MODIFY COLUMN k1 COMMENT "k1", MODIFY COLUMN k2 COMMENT "k2"; - - 21. 修改引擎类型 - - ALTER TABLE example_db.mysql_table MODIFY ENGINE TO odbc PROPERTIES("driver" = "MySQL"); - - [rename] - 1. 将名为 table1 的表修改为 table2 - ALTER TABLE table1 RENAME table2; - - 2. 将表 example_table 中名为 rollup1 的 rollup index 修改为 rollup2 - ALTER TABLE example_table RENAME ROLLUP rollup1 rollup2; - - 3. 将表 example_table 中名为 p1 的 partition 修改为 p2 - ALTER TABLE example_table RENAME PARTITION p1 p2; - - [replace] - 1. 将 tabel1 和 table2 的数据互换 - ALTER TABLE table1 REPLACE WITH TABLE table2; - 2. 用 table2 表中的数据替换 tabel1 表中的数据, 并删除 table2 - ALTER TABLE table1 REPLACE WITH TABLE table2 PROPERTIES('swap' = 'false'); - - [index] - 1. 在table1 上为siteid 创建bitmap 索引 - ALTER TABLE table1 ADD INDEX [IF NOT EXISTS] index_name (siteid) [USING BITMAP] COMMENT 'balabala'; - 2. 删除table1 上的siteid列的bitmap 索引 - ALTER TABLE table1 DROP INDEX [IF EXISTS] index_name; - -## keyword - - ALTER,TABLE,ROLLUP,COLUMN,PARTITION,RENAME diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER VIEW.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER VIEW.md deleted file mode 100644 index 6969f1a6f7..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/ALTER VIEW.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -{ - "title": "ALTER VIEW", - "language": "zh-CN" -} ---- - - - -# ALTER VIEW -## description - 该语句用于修改一个view的定义 - 语法: - ALTER VIEW - [db_name.]view_name - (column1[ COMMENT "col comment"][, column2, ...]) - AS query_stmt - - 说明: - 1. 视图都是逻辑上的,其中的数据不会存储在物理介质上,在查询时视图将作为语句中的子查询,因此,修改视图的定义等价于修改query_stmt。 - 2. query_stmt 为任意支持的 SQL - -## example - 1、修改example_db上的视图example_view - - ALTER VIEW example_db.example_view - ( - c1 COMMENT "column 1", - c2 COMMENT "column 2", - c3 COMMENT "column 3" - ) - AS SELECT k1, k2, SUM(v1) FROM example_table - GROUP BY k1, k2 - - \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/BACKUP.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/BACKUP.md deleted file mode 100644 index 741cb9768b..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/BACKUP.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -{ - "title": "BACKUP", - "language": "zh-CN" -} ---- - - - -# BACKUP -## description - 该语句用于备份指定数据库下的数据。该命令为异步操作。提交成功后,需通过 SHOW BACKUP 命令查看进度。仅支持备份 OLAP 类型的表。 - 语法: - BACKUP SNAPSHOT [db_name].{snapshot_name} - TO `repository_name` - [ON|EXCLUDE] ( - `table_name` [PARTITION (`p1`, ...)], - ... - ) - PROPERTIES ("key"="value", ...); - - 说明: - 1. 同一数据库下只能有一个正在执行的 BACKUP 或 RESTORE 任务。 - 2. ON 子句中标识需要备份的表和分区。如果不指定分区,则默认备份该表的所有分区。 - 3. EXCLUDE 子句中标识不需要备份的表和分区。备份除了指定的表或分区之外这个数据库中所有表的所有分区数据。 - 4. PROPERTIES 目前支持以下属性: - "type" = "full":表示这是一次全量更新(默认)。 - "timeout" = "3600":任务超时时间,默认为一天。单位秒。 - -## example - - 1. 全量备份 example_db 下的表 example_tbl 到仓库 example_repo 中: - BACKUP SNAPSHOT example_db.snapshot_label1 - TO example_repo - ON (example_tbl) - PROPERTIES ("type" = "full"); - - 2. 全量备份 example_db 下,表 example_tbl 的 p1, p2 分区,以及表 example_tbl2 到仓库 example_repo 中: - BACKUP SNAPSHOT example_db.snapshot_label2 - TO example_repo - ON - ( - example_tbl PARTITION (p1,p2), - example_tbl2 - ); - - 3. 全量备份 example_db 下除了表 example_tbl 的其他所有表到仓库 example_repo 中: - BACKUP SNAPSHOT example_db.snapshot_label3 - TO example_repo - EXCLUDE (example_tbl); - -## keyword - BACKUP - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md deleted file mode 100644 index 843c643cc6..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL ALTER.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -{ - "title": "CANCEL ALTER", - "language": "zh-CN" -} ---- - - - -# CANCEL ALTER -## description - 该语句用于撤销一个 ALTER 操作。 - 1. 撤销 ALTER TABLE COLUMN 操作 - 语法: - CANCEL ALTER TABLE COLUMN - FROM db_name.table_name - - 2. 撤销 ALTER TABLE ROLLUP 操作 - 语法: - CANCEL ALTER TABLE ROLLUP - FROM db_name.table_name - - 3. 根据job id批量撤销rollup操作 - 语法: - CANCEL ALTER TABLE ROLLUP - FROM db_name.table_name (jobid,...) - 注意: - 该命令为异步操作,具体是否执行成功需要使用`show alter table rollup`查看任务状态确认 - 4. 撤销 ALTER CLUSTER 操作 - 语法: - (待实现...) - - -## example - [CANCEL ALTER TABLE COLUMN] - 1. 撤销针对 my_table 的 ALTER COLUMN 操作。 - CANCEL ALTER TABLE COLUMN - FROM example_db.my_table; - - [CANCEL ALTER TABLE ROLLUP] - 1. 撤销 my_table 下的 ADD ROLLUP 操作。 - CANCEL ALTER TABLE ROLLUP - FROM example_db.my_table; - - [CANCEL ALTER TABLE ROLLUP] - 1. 根据job id撤销 my_table 下的 ADD ROLLUP 操作。 - CANCEL ALTER TABLE ROLLUP - FROM example_db.my_table (12801,12802); - -## keyword - CANCEL,ALTER,TABLE,COLUMN,ROLLUP - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md deleted file mode 100644 index f9648cbab9..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL BACKUP.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "CANCEL BACKUP", - "language": "zh-CN" -} ---- - - - -# CANCEL BACKUP -## description - 该语句用于取消一个正在进行的 BACKUP 任务。 - 语法: - CANCEL BACKUP FROM db_name; - -## example - 1. 取消 example_db 下的 BACKUP 任务。 - CANCEL BACKUP FROM example_db; - -## keyword - CANCEL, BACKUP - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md deleted file mode 100644 index 5ae2052e3a..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CANCEL RESTORE.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -{ - "title": "CANCEL RESTORE", - "language": "zh-CN" -} ---- - - - -# CANCEL RESTORE -## description - 该语句用于取消一个正在进行的 RESTORE 任务。 - 语法: - CANCEL RESTORE FROM db_name; - - 注意: - 当取消处于 COMMIT 或之后阶段的恢复左右时,可能导致被恢复的表无法访问。此时只能通过再次执行恢复作业进行数据恢复。 - -## example - 1. 取消 example_db 下的 RESTORE 任务。 - CANCEL RESTORE FROM example_db; - -## keyword - CANCEL, RESTORE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md deleted file mode 100644 index 1fac750903..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE DATABASE.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -{ - "title": "CREATE DATABASE", - "language": "zh-CN" -} ---- - - - -# CREATE DATABASE - -## Description - - 该语句用于新建数据库(database) - 语法: - CREATE DATABASE [IF NOT EXISTS] db_name - [PROPERTIES ("key"="value", ...)]; - -1. PROPERTIES - 该数据库的附加信息,可以缺省。 - 1)如果创建 Iceberg 数据库,则需要在 properties 中提供以下信息: - ``` - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ) - - ``` - 其中 `iceberg.database` 是 Iceberg 对应的库名; - `iceberg.hive.metastore.uris` 是 hive metastore 服务地址。 - `iceberg.catalog.type` 默认为 `HIVE_CATALOG`。当前仅支持 `HIVE_CATALOG`,后续会支持更多 Iceberg catalog 类型。 - -## example - 1. 新建数据库 db_test - ``` - CREATE DATABASE db_test; - ``` - - 2. 新建 Iceberg 数据库 iceberg_test - ``` - CREATE DATABASE `iceberg_test` - PROPERTIES ( - "iceberg.database" = "doris", - "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - -## keyword - CREATE,DATABASE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md deleted file mode 100644 index 1faec7c2b4..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE ENCRYPTKEY.md +++ /dev/null @@ -1,80 +0,0 @@ ---- -{ - "title": "CREATE ENCRYPTKEY", - "language": "zh-CN" -} ---- - - - -# CREATE ENCRYPTKEY - -## Description - -### Syntax - -``` -CREATE ENCRYPTKEY key_name - AS "key_string" -``` - -### Parameters - -> `key_name`: 要创建密钥的名字, 可以包含数据库的名字。比如:`db1.my_key`。 -> -> `key_string`: 要创建密钥的字符串 - -此语句创建一个自定义密钥。执行此命令需要用户拥有 `ADMIN` 权限。 - -如果 `key_name` 中包含了数据库名字,那么这个自定义密钥会创建在对应的数据库中,否则这个函数将会创建在当前会话所在的数据库。新密钥的名字不能够与对应数据库中已存在的密钥相同,否则会创建失败。 - -## Example - -1. 创建一个自定义密钥 - - ``` - CREATE ENCRYPTKEY my_key AS "ABCD123456789"; - ``` - -2. 使用自定义密钥 - - 使用自定义密钥需在密钥前添加关键字 `KEY`/`key`,与 `key_name` 空格隔开。 - - ``` - mysql> SELECT HEX(AES_ENCRYPT("Doris is Great", KEY my_key)); - +------------------------------------------------+ - | hex(aes_encrypt('Doris is Great', key my_key)) | - +------------------------------------------------+ - | D26DB38579D6A343350EDDC6F2AD47C6 | - +------------------------------------------------+ - 1 row in set (0.02 sec) - - mysql> SELECT AES_DECRYPT(UNHEX('D26DB38579D6A343350EDDC6F2AD47C6'), KEY my_key); - +--------------------------------------------------------------------+ - | aes_decrypt(unhex('D26DB38579D6A343350EDDC6F2AD47C6'), key my_key) | - +--------------------------------------------------------------------+ - | Doris is Great | - +--------------------------------------------------------------------+ - 1 row in set (0.01 sec) - ``` - -## Keyword - - CREATE,ENCRYPTKEY diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE INDEX.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE INDEX.md deleted file mode 100644 index 9a8f051fab..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE INDEX.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -{ - "title": "CREATE INDEX", - "language": "zh-CN" -} ---- - - - -# CREATE INDEX - -## description - - 该语句用于创建索引 - 语法: - CREATE INDEX [IF NOT EXISTS] index_name ON table_name (column [, ...],) [USING BITMAP] [COMMENT'balabala']; - 注意: - 1. 目前只支持bitmap 索引 - 2. BITMAP 索引仅在单列上创建 - -## example - - 1. 在table1 上为siteid 创建bitmap 索引 - CREATE INDEX [IF NOT EXISTS] index_name ON table1 (siteid) USING BITMAP COMMENT 'balabala'; - -## keyword - - CREATE,INDEX diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md deleted file mode 100644 index 531fdd0b7b..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE MATERIALIZED VIEW.md +++ /dev/null @@ -1,237 +0,0 @@ ---- -{ - "title": "CREATE MATERIALIZED VIEW", - "language": "zh-CN" -} ---- - - - -# CREATE MATERIALIZED VIEW - -## description - - 该语句用于创建物化视图。 - -说明: - 异步语法,调用成功后仅表示创建物化视图的任务提交成功,用户需要先通过 ``` show alter table rollup ``` 来查看物化视图的创建进度。 - 在显示 FINISHED 后既可通过 ``` desc [table_name] all ``` 命令来查看物化视图的 schema 了。 - -语法: - - ``` - - CREATE MATERIALIZED VIEW [MV name] as [query] - [PROPERTIES ("key" = "value")] - - ``` - -1. MV name - - 物化视图的名称,必填项。 - - 相同表的物化视图名称不可重复。 - -2. query - - 用于构建物化视图的查询语句,查询语句的结果既物化视图的数据。目前支持的 query 格式为: - - ``` - - SELECT select_expr[, select_expr ...] - FROM [Base view name] - GROUP BY column_name[, column_name ...] - ORDER BY column_name[, column_name ...] - - 语法和查询语句语法一致。 - - ``` - - select_expr: 物化视图的 schema 中所有的列。 - + 仅支持不带表达式计算的单列,聚合列。 - + 其中聚合函数目前仅支持 SUM, MIN, MAX 三种,且聚合函数的参数只能是不带表达式计算的单列。 - + 至少包含一个单列。 - + 所有涉及到的列,均只能出现一次。 - - base view name: 物化视图的原始表名,必填项。 - + 必须是单表,且非子查询 - - group by: 物化视图的分组列,选填项。 - + 不填则数据不进行分组。 - - order by: 物化视图的排序列,选填项。 - + 排序列的声明顺序必须和 select_expr 中列声明顺序一致。 - + 如果不声明 order by,则根据规则自动补充排序列。 - 如果物化视图是聚合类型,则所有的分组列自动补充为排序列。 - 如果物化视图是非聚合类型,则前 36 个字节自动补充为排序列。如果自动补充的排序个数小于3个,则前三个作为排序列。 - + 如果 query 中包含分组列的话,则排序列必须和分组列一致。 - -3. properties - - 声明物化视图的一些配置,选填项。 - - ``` - - PROPERTIES ("key" = "value", "key" = "value" ...) - - ``` - - 以下几个配置,均可声明在此处: - - short_key: 排序列的个数。 - timeout: 物化视图构建的超时时间。 - -## example - -Base 表结构为 - -``` -mysql> desc duplicate_table; -+-------+--------+------+------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+-------+--------+------+------+---------+-------+ -| k1 | INT | Yes | true | N/A | | -| k2 | INT | Yes | true | N/A | | -| k3 | BIGINT | Yes | true | N/A | | -| k4 | BIGINT | Yes | true | N/A | | -+-------+--------+------+------+---------+-------+ -``` - -1. 创建一个仅包含原始表 (k1, k2)列的物化视图 - - ``` - create materialized view k1_k2 as - select k1, k2 from duplicate_table; - ``` - - 物化视图的 schema 如下图,物化视图仅包含两列 k1, k2 且不带任何聚合 - - ``` - +-----------------+-------+--------+------+------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +-----------------+-------+--------+------+------+---------+-------+ - | k1_k2 | k1 | INT | Yes | true | N/A | | - | | k2 | INT | Yes | true | N/A | | - +-----------------+-------+--------+------+------+---------+-------+ - ``` - -2. 创建一个以 k2 为排序列的物化视图 - - ``` - create materialized view k2_order as - select k2, k1 from duplicate_table order by k2; - ``` - - 物化视图的 schema 如下图,物化视图仅包含两列 k2, k1,其中 k2 列为排序列,不带任何聚合。 - - ``` - +-----------------+-------+--------+------+-------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +-----------------+-------+--------+------+-------+---------+-------+ - | k2_order | k2 | INT | Yes | true | N/A | | - | | k1 | INT | Yes | false | N/A | NONE | - +-----------------+-------+--------+------+-------+---------+-------+ - ``` - -3. 创建一个以 k1, k2 分组,k3 列为 SUM 聚合的物化视图 - - ``` - create materialized view k1_k2_sumk3 as - select k1, k2, sum(k3) from duplicate_table group by k1, k2; - ``` - - 物化视图的 schema 如下图,物化视图包含两列 k1, k2,sum(k3) 其中 k1, k2 为分组列,sum(k3) 为根据 k1, k2 分组后的 k3 列的求和值。 - - 由于物化视图没有声明排序列,且物化视图带聚合数据,系统默认补充分组列 k1, k2 为排序列。 - - ``` - +-----------------+-------+--------+------+-------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +-----------------+-------+--------+------+-------+---------+-------+ - | k1_k2_sumk3 | k1 | INT | Yes | true | N/A | | - | | k2 | INT | Yes | true | N/A | | - | | k3 | BIGINT | Yes | false | N/A | SUM | - +-----------------+-------+--------+------+-------+---------+-------+ - ``` - -4. 创建一个去除重复行的物化视图 - - ``` - create materialized view deduplicate as - select k1, k2, k3, k4 from duplicate_table group by k1, k2, k3, k4; - ``` - - 物化视图 schema 如下图,物化视图包含 k1, k2, k3, k4列,且不存在重复行。 - - ``` - +-----------------+-------+--------+------+-------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +-----------------+-------+--------+------+-------+---------+-------+ - | deduplicate | k1 | INT | Yes | true | N/A | | - | | k2 | INT | Yes | true | N/A | | - | | k3 | BIGINT | Yes | true | N/A | | - | | k4 | BIGINT | Yes | true | N/A | | - +-----------------+-------+--------+------+-------+---------+-------+ - - ``` - -5. 创建一个不声明排序列的非聚合型物化视图 - - all_type_table 的 schema 如下: - - ``` - +-------+--------------+------+-------+---------+-------+ - | Field | Type | Null | Key | Default | Extra | - +-------+--------------+------+-------+---------+-------+ - | k1 | TINYINT | Yes | true | N/A | | - | k2 | SMALLINT | Yes | true | N/A | | - | k3 | INT | Yes | true | N/A | | - | k4 | BIGINT | Yes | true | N/A | | - | k5 | DECIMAL(9,0) | Yes | true | N/A | | - | k6 | DOUBLE | Yes | false | N/A | NONE | - | k7 | VARCHAR(20) | Yes | false | N/A | NONE | - +-------+--------------+------+-------+---------+-------+ - ``` - - 物化视图包含 k3, k4, k5, k6, k7 列,且不声明排序列,则创建语句如下: - - ``` - create materialized view mv_1 as - select k3, k4, k5, k6, k7 from all_type_table; - ``` - - 系统默认补充的排序列为 k3, k4, k5 三列。这三列类型的字节数之和为 4(INT) + 8(BIGINT) + 16(DECIMAL) = 28 < 36。所以补充的是这三列作为排序列。 - 物化视图的 schema 如下,可以看到其中 k3, k4, k5 列的 key 字段为 true,也就是排序列。k6, k7 列的 key 字段为 false,也就是非排序列。 - - ``` - +----------------+-------+--------------+------+-------+---------+-------+ - | IndexName | Field | Type | Null | Key | Default | Extra | - +----------------+-------+--------------+------+-------+---------+-------+ - | mv_1 | k3 | INT | Yes | true | N/A | | - | | k4 | BIGINT | Yes | true | N/A | | - | | k5 | DECIMAL(9,0) | Yes | true | N/A | | - | | k6 | DOUBLE | Yes | false | N/A | NONE | - | | k7 | VARCHAR(20) | Yes | false | N/A | NONE | - +----------------+-------+--------------+------+-------+---------+-------+ - ``` - - -## keyword - CREATE, MATERIALIZED, VIEW diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md deleted file mode 100644 index 02b429c1ea..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE REPOSITORY.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -{ - "title": "CREATE REPOSITORY", - "language": "zh-CN" -} ---- - - - -# CREATE REPOSITORY -## description - 该语句用于创建仓库。仓库用于属于备份或恢复。仅 root 或 superuser 用户可以创建仓库。 - 语法: - CREATE [READ ONLY] REPOSITORY `repo_name` - WITH [BROKER `broker_name`|S3] - ON LOCATION `repo_location` - PROPERTIES ("key"="value", ...); - - 说明: - 1. 仓库的创建,依赖于已存在的 broker 或者直接通过AWS s3 协议访问云存储 - 2. 如果是只读仓库,则只能在仓库上进行恢复。如果不是,则可以进行备份和恢复操作。 - 3. 根据 broker 或者S3的不同类型,PROPERTIES 有所不同,具体见示例。 - -## example - 1. 创建名为 bos_repo 的仓库,依赖 BOS broker "bos_broker",数据根目录为:bos://palo_backup - CREATE REPOSITORY `bos_repo` - WITH BROKER `bos_broker` - ON LOCATION "bos://palo_backup" - PROPERTIES - ( - "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "bos_accesskey", - "bos_secret_accesskey"="bos_secret_accesskey" - ); - - 2. 创建和示例 1 相同的仓库,但属性为只读: - CREATE READ ONLY REPOSITORY `bos_repo` - WITH BROKER `bos_broker` - ON LOCATION "bos://palo_backup" - PROPERTIES - ( - "bos_endpoint" = "http://gz.bcebos.com", - "bos_accesskey" = "bos_accesskey", - "bos_secret_accesskey"="bos_accesskey" - ); - - 3. 创建名为 hdfs_repo 的仓库,依赖 Baidu hdfs broker "hdfs_broker",数据根目录为:hdfs://hadoop-name-node:54310/path/to/repo/ - CREATE REPOSITORY `hdfs_repo` - WITH BROKER `hdfs_broker` - ON LOCATION "hdfs://hadoop-name-node:54310/path/to/repo/" - PROPERTIES - ( - "username" = "user", - "password" = "password" - ); - - 4. 创建名为 s3_repo 的仓库,直接链接云存储,而不通过broker. - CREATE REPOSITORY `s3_repo` - WITH S3 - ON LOCATION "s3://s3-repo" - PROPERTIES - ( - "AWS_ENDPOINT" = "http://s3-REGION.amazonaws.com", - "AWS_ACCESS_KEY" = "AWS_ACCESS_KEY", - "AWS_SECRET_KEY"="AWS_SECRET_KEY", - "AWS_REGION" = "REGION" - ); - -## keyword - CREATE, REPOSITORY - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md deleted file mode 100644 index 4045fb202c..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE RESOURCE.md +++ /dev/null @@ -1,132 +0,0 @@ ---- -{ - "title": "CREATE RESOURCE", - "language": "zh-CN" -} ---- - - - -# CREATE RESOURCE - -## Description - - 该语句用于创建资源。仅 root 或 admin 用户可以创建资源。目前支持 Spark, ODBC, S3 外部资源。 - 将来其他外部资源可能会加入到 Doris 中使用,如 Spark/GPU 用于查询,HDFS/S3 用于外部存储,MapReduce 用于 ETL 等。 - 语法: - CREATE [EXTERNAL] RESOURCE "resource_name" - PROPERTIES ("key"="value", ...); - - 说明: - 1. PROPERTIES中需要指定资源的类型 "type" = "[spark|odbc_catalog|s3]",目前支持 spark, odbc_catalog, s3。 - 2. 根据资源类型的不同 PROPERTIES 有所不同,具体见示例。 - -## Example - - 1. 创建yarn cluster 模式,名为 spark0 的 Spark 资源。 - - ``` - CREATE EXTERNAL RESOURCE "spark0" - PROPERTIES - ( - "type" = "spark", - "spark.master" = "yarn", - "spark.submit.deployMode" = "cluster", - "spark.jars" = "xxx.jar,yyy.jar", - "spark.files" = "/tmp/aaa,/tmp/bbb", - "spark.executor.memory" = "1g", - "spark.yarn.queue" = "queue0", - "spark.hadoop.yarn.resourcemanager.address" = "127.0.0.1:9999", - "spark.hadoop.fs.defaultFS" = "hdfs://127.0.0.1:10000", - "working_dir" = "hdfs://127.0.0.1:10000/tmp/doris", - "broker" = "broker0", - "broker.username" = "user0", - "broker.password" = "password0" - ); - ``` - - Spark 相关参数如下: - - spark.master: 必填,目前支持yarn,spark://host:port。 - - spark.submit.deployMode: Spark 程序的部署模式,必填,支持 cluster,client 两种。 - - spark.hadoop.yarn.resourcemanager.address: master为yarn时必填。 - - spark.hadoop.fs.defaultFS: master为yarn时必填。 - - 其他参数为可选,参考http://spark.apache.org/docs/latest/configuration.html - - Spark 用于 ETL 时需要指定 working_dir 和 broker。说明如下: - working_dir: ETL 使用的目录。spark作为ETL资源使用时必填。例如:hdfs://host:port/tmp/doris。 - broker: broker 名字。spark作为ETL资源使用时必填。需要使用`ALTER SYSTEM ADD BROKER` 命令提前完成配置。 - broker.property_key: broker读取ETL生成的中间文件时需要指定的认证信息等。 - - 2. 创建 ODBC resource - - ``` - CREATE EXTERNAL RESOURCE `oracle_odbc` - PROPERTIES ( - "type" = "odbc_catalog", - "host" = "192.168.0.1", - "port" = "8086", - "user" = "test", - "password" = "test", - "database" = "test", - "odbc_type" = "oracle", - "driver" = "Oracle 19 ODBC driver" - ); - ``` - - ODBC 的相关参数如下: - - hosts:外表数据库的IP地址 - - driver:ODBC外表的Driver名,该名字需要和be/conf/odbcinst.ini中的Driver名一致。 - - odbc_type:外表数据库的类型,当前支持oracle, mysql, postgresql - - user:外表数据库的用户名 - - password:对应用户的密码信息 - - 3. 创建 S3 resource - - ``` - CREATE RESOURCE "remote_s3" - PROPERTIES - ( - "type" = "s3", - "s3_endpoint" = "http://bj.s3.com", - "s3_region" = "bj", - "s3_root_path" = "/path/to/root", - "s3_access_key" = "bbb", - "s3_secret_key" = "aaaa", - "s3_max_connections" = "50", - "s3_request_timeout_ms" = "3000", - "s3_connection_timeout_ms" = "1000" - ); - ``` - - S3 相关参数如下: - - 必需参数 - - s3_endpoint:s3 endpoint - - s3_region:s3 region - - s3_root_path:s3 根目录 - - s3_access_key:s3 access key - - s3_secret_key:s3 secret key - - 可选参数 - - s3_max_connections:s3 最大连接数量,默认为 50 - - s3_request_timeout_ms:s3 请求超时时间,单位毫秒,默认为 3000 - - s3_connection_timeout_ms:s3 连接超时时间,单位毫秒,默认为 1000 - -## keyword - - CREATE, RESOURCE diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md deleted file mode 100644 index 89c70f03a3..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE TABLE LIKE.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -{ - "title": "CREATE TABLE LIKE", - "language": "zh-CN" -} ---- - - - -# CREATE TABLE LIKE - -## description - -该语句用于创建一个表结构和另一张表完全相同的空表,同时也能够可选复制一些rollup。 -语法: - -``` - CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database.]table_name LIKE [database.]table_name [WITH ROLLUP (r1,r2,r3,...)] -``` - -说明: - 1. 复制的表结构包括Column Definition、Partitions、Table Properties等 - 2. 用户需要对复制的原表有`SELECT`权限 - 3. 支持复制MySQL等外表 - 4. 支持复制OLAP Table的rollup - -## Example - 1. 在test1库下创建一张表结构和table1相同的空表,表名为table2 - - CREATE TABLE test1.table2 LIKE test1.table1 - - 2. 在test2库下创建一张表结构和test1.table1相同的空表,表名为table2 - - CREATE TABLE test2.table2 LIKE test1.table1 - - 3. 在test1库下创建一张表结构和table1相同的空表,表名为table2,同时复制table1的r1,r2两个rollup - - CREATE TABLE test1.table2 LIKE test1.table1 WITH ROLLUP (r1,r2) - - 4. 在test1库下创建一张表结构和table1相同的空表,表名为table2,同时复制table1的所有rollup - - CREATE TABLE test1.table2 LIKE test1.table1 WITH ROLLUP - - 5. 在test2库下创建一张表结构和test1.table1相同的空表,表名为table2,同时复制table1的r1,r2两个rollup - - CREATE TABLE test2.table2 LIKE test1.table1 WITH ROLLUP (r1,r2) - - 6. 在test2库下创建一张表结构和test1.table1相同的空表,表名为table2,同时复制table1的所有rollup - - CREATE TABLE test2.table2 LIKE test1.table1 WITH ROLLUP - - 7. 在test1库下创建一张表结构和MySQL外表table1相同的空表,表名为table2 - - CREATE TABLE test1.table2 LIKE test1.table1 - -## keyword - -``` - CREATE,TABLE,LIKE - -``` diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE TABLE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE TABLE.md deleted file mode 100644 index ea39f15de6..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE TABLE.md +++ /dev/null @@ -1,912 +0,0 @@ ---- -{ - "title": "CREATE TABLE", - "language": "zh-CN" -} ---- - - - -# CREATE TABLE - -## description - -该语句用于创建 table。 -语法: - -``` - CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [database.]table_name - (column_definition1[, column_definition2, ...] - [, index_definition1[, index_definition2, ...]]) - [ENGINE = [olap|mysql|broker|hive|iceberg]] - [key_desc] - [COMMENT "table comment"]; - [partition_desc] - [distribution_desc] - [rollup_index] - [PROPERTIES ("key"="value", ...)] - [BROKER PROPERTIES ("key"="value", ...)] -``` - -1. column_definition - 语法: - `col_name col_type [agg_type] [NULL | NOT NULL] [DEFAULT "default_value"]` - - 说明: - col_name:列名称 - col_type:列类型 - - ``` - BOOLEAN(1字节) - 范围:{0,1} - TINYINT(1字节) - 范围:-2^7 + 1 ~ 2^7 - 1 - SMALLINT(2字节) - 范围:-2^15 + 1 ~ 2^15 - 1 - INT(4字节) - 范围:-2^31 + 1 ~ 2^31 - 1 - BIGINT(8字节) - 范围:-2^63 + 1 ~ 2^63 - 1 - LARGEINT(16字节) - 范围:-2^127 + 1 ~ 2^127 - 1 - FLOAT(4字节) - 支持科学计数法 - DOUBLE(8字节) - 支持科学计数法 - DECIMAL[(precision, scale)] (16字节) - 保证精度的小数类型。默认是 DECIMAL(10, 0) - precision: 1 ~ 27 - scale: 0 ~ 9 - 其中整数部分为 1 ~ 18 - 不支持科学计数法 - DATE(3字节) - 范围:0000-01-01 ~ 9999-12-31 - DATETIME(8字节) - 范围:0000-01-01 00:00:00 ~ 9999-12-31 23:59:59 - CHAR[(length)] - 定长字符串。长度范围:1 ~ 255。默认为1 - VARCHAR[(length)] - 变长字符串。长度范围:1 ~ 65533 - HLL (1~16385个字节) - hll列类型,不需要指定长度和默认值、长度根据数据的聚合 - 程度系统内控制,并且HLL列只能通过配套的hll_union_agg、Hll_cardinality、hll_hash进行查询或使用 - BITMAP - bitmap列类型,不需要指定长度和默认值。表示整型的集合,元素最大支持到2^64 - 1 - QUANTILE_STATE - QUANTILE_STATE列类型,不需要指定长度和默认值,表示分位数预聚合结果。目前仅支持原始数据为数值类型如:TINYINT、INT、FLOAT、DOUBLE、DECIMAL。当元素个数小于2048时存储明细数据,当元素个数大于2048时存储 [TDigest](https://github.com/tdunning/t-digest/blob/main/docs/t-digest-paper/histo.pdf) 算法预聚合的中间结果 - ``` - - agg_type:聚合类型,如果不指定,则该列为 key 列。否则,该列为 value 列 - - * SUM、MAX、MIN、REPLACE - * HLL_UNION(仅用于HLL列,为HLL独有的聚合方式)、 - * BITMAP_UNION(仅用于 BITMAP 列,为 BITMAP 独有的聚合方式)、 - * QUANTILE_UNION(仅用于 QUANTILE_STATE 列,为 QUANTILE_STATE 独有的聚合方式) - * REPLACE_IF_NOT_NULL:这个聚合类型的含义是当且仅当新导入数据是非NULL值时会发生替换行为,如果新导入的数据是NULL,那么Doris仍然会保留原值。注意:如果用在建表时REPLACE_IF_NOT_NULL列指定了NOT NULL,那么Doris仍然会将其转化NULL,不会向用户报错。用户可以借助这个类型完成部分列导入的功能。**这里要注意的是字段默认值要给NULL,而不能是空字符串,如果是空字符串,会给你替换成空字符串**。 - * 该类型只对聚合模型(key_desc的type为AGGREGATE KEY)有用,其它模型不需要指这个。 - - 是否允许为NULL: 默认允许为 NULL。NULL 值在导入数据中用 \N 来表示 - - 注意: - - BITMAP_UNION聚合类型列在导入时的原始数据类型必须是TINYINT,SMALLINT,INT,BIGINT。 - - QUANTILE_UNION聚合类型列在导入时的原始数据类型必须是数值类型如:TINYINT、INT、FLOAT、DOUBLE、DECIMAL - -2. index_definition - 语法: - `INDEX index_name (col_name[, col_name, ...]) [USING BITMAP] COMMENT 'xxxxxx'` - 说明: - index_name:索引名称 - col_name:列名 - 注意: - 当前仅支持BITMAP索引, BITMAP索引仅支持应用于单列 - -3. ENGINE 类型 - 默认为 olap。可选 mysql, broker, hive, iceberg - 1) 如果是 mysql,则需要在 properties 提供以下信息: - -``` - PROPERTIES ( - "host" = "mysql_server_host", - "port" = "mysql_server_port", - "user" = "your_user_name", - "password" = "your_password", - "database" = "database_name", - "table" = "table_name" - ) -``` - - 注意: - "table" 条目中的 "table_name" 是 mysql 中的真实表名。 - 而 CREATE TABLE 语句中的 table_name 是该 mysql 表在 Doris 中的名字,可以不同。 - - 在 Doris 创建 mysql 表的目的是可以通过 Doris 访问 mysql 数据库。 - 而 Doris 本身并不维护、存储任何 mysql 数据。 - 2) 如果是 broker,表示表的访问需要通过指定的broker, 需要在 properties 提供以下信息: - ``` - PROPERTIES ( - "broker_name" = "broker_name", - "path" = "file_path1[,file_path2]", - "column_separator" = "value_separator" - "line_delimiter" = "value_delimiter" - ) - ``` - 另外还需要提供Broker需要的Property信息,通过BROKER PROPERTIES来传递,例如HDFS需要传入 - ``` - BROKER PROPERTIES( - "username" = "name", - "password" = "password" - ) - ``` - 这个根据不同的Broker类型,需要传入的内容也不相同 - 注意: - "path" 中如果有多个文件,用逗号[,]分割。如果文件名中包含逗号,那么使用 %2c 来替代。如果文件名中包含 %,使用 %25 代替 - 现在文件内容格式支持CSV,支持GZ,BZ2,LZ4,LZO(LZOP) 压缩格式。 - - 3) 如果是 hive,则需要在 properties 提供以下信息: - ``` - PROPERTIES ( - "database" = "hive_db_name", - "table" = "hive_table_name", - "hive.metastore.uris" = "thrift://127.0.0.1:9083" - ) - - ``` - 其中 database 是 hive 表对应的库名字,table 是 hive 表的名字,hive.metastore.uris 是 hive metastore 服务地址。 - - 4)如果是 iceberg,则需要在 properties 中提供以下信息: - ``` - PROPERTIES ( - "iceberg.database" = "iceberg_db_name", - "iceberg.table" = "iceberg_table_name", - "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ) - - ``` - 其中 database 是 Iceberg 对应的库名; - table 是 Iceberg 中对应的表名; - hive.metastore.uris 是 hive metastore 服务地址; - catalog.type 默认为 HIVE_CATALOG。当前仅支持 HIVE_CATALOG,后续会支持更多 Iceberg catalog 类型。 - - -4. key_desc - 语法: - `key_type(k1[,k2 ...])` - 说明: - 数据按照指定的key列进行排序,且根据不同的key_type具有不同特性。 - key_type支持以下类型: - AGGREGATE KEY:key列相同的记录,value列按照指定的聚合类型进行聚合, - 适合报表、多维分析等业务场景。 - UNIQUE KEY:key列相同的记录,value列按导入顺序进行覆盖, - 适合按key列进行增删改查的点查询业务。 - DUPLICATE KEY:key列相同的记录,同时存在于Doris中, - 适合存储明细数据或者数据无聚合特性的业务场景。 - 默认为DUPLICATE KEY,key列为列定义中前36个字节, 如果前36个字节的列数小于3,将使用前三列。 - 注意: - 除AGGREGATE KEY外,其他key_type在建表时,value列不需要指定聚合类型。 - -5. partition_desc - 目前支持 RANGE 和 LIST 两种分区方式。 - 5.1 RANGE 分区 - RANGE partition描述有两种使用方式 - 1) LESS THAN - 语法: - - ``` - PARTITION BY RANGE (k1, k2, ...) - ( - PARTITION partition_name1 VALUES LESS THAN MAXVALUE|("value1", "value2", ...), - PARTITION partition_name2 VALUES LESS THAN MAXVALUE|("value1", "value2", ...) - ... - ) - ``` - - 说明: - 使用指定的 key 列和指定的数值范围进行分区。 - 1) 分区名称仅支持字母开头,字母、数字和下划线组成 - 2) 目前仅支持以下类型的列作为 Range 分区列 - TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME - 3) 分区为左闭右开区间,首个分区的左边界为做最小值 - 4) NULL 值只会存放在包含最小值的分区中。当包含最小值的分区被删除后,NULL 值将无法导入。 - 5) 可以指定一列或多列作为分区列。如果分区值缺省,则会默认填充最小值。 - - 注意: - 1) 分区一般用于时间维度的数据管理 - 2) 有数据回溯需求的,可以考虑首个分区为空分区,以便后续增加分区 - - 2)Fixed Range - 语法: - ``` - PARTITION BY RANGE (k1, k2, k3, ...) - ( - PARTITION partition_name1 VALUES [("k1-lower1", "k2-lower1", "k3-lower1",...), ("k1-upper1", "k2-upper1", "k3-upper1", ...)), - PARTITION partition_name2 VALUES [("k1-lower1-2", "k2-lower1-2", ...), ("k1-upper1-2", MAXVALUE, )) - "k3-upper1-2", ... - ) - ``` - 说明: - 1)Fixed Range比LESS THAN相对灵活些,左右区间完全由用户自己确定 - 2)其他与LESS THAN保持同步 - - 5.2 LIST 分区 - LIST partition分为单列分区和多列分区 - 1) 单列分区 - 语法: - - ``` - PARTITION BY LIST(k1) - ( - PARTITION partition_name1 VALUES IN ("value1", "value2", ...), - PARTITION partition_name2 VALUES IN ("value1", "value2", ...) - ... - ) - ``` - - 说明: - 使用指定的 key 列和制定的枚举值进行分区。 - 1) 分区名称仅支持字母开头,字母、数字和下划线组成 - 2) 目前仅支持以下类型的列作为 List 分区列 - BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME, CHAR, VARCHAR - 3) 分区为枚举值集合,各个分区之间分区值不能重复 - 4) 不可导入 NULL 值 - 5) 分区值不能缺省,必须指定至少一个 - - 2) 多列分区 - 语法: - - ``` - PARTITION BY LIST(k1, k2) - ( - PARTITION partition_name1 VALUES IN (("value1", "value2"), ("value1", "value2"), ...), - PARTITION partition_name2 VALUES IN (("value1", "value2"), ("value1", "value2"), ...) - ... - ) - ``` - - 说明: - 1) 多列分区的分区是元组枚举值的集合 - 2) 每个元组值的个数必须与分区列个数相等 - 3) 其他与单列分区保持同步 - -6. distribution_desc - 1) Hash 分桶 - 语法: - `DISTRIBUTED BY HASH (k1[,k2 ...]) [BUCKETS num]` - 说明: - 使用指定的 key 列进行哈希分桶。 - 2) Random 分桶 - 语法: - `DISTRIBUTED BY RANDOM [BUCKETS num]` - 说明: - 使用随机数进行分桶。 - 建议: 当没有合适的key做哈希分桶使得表的数据均匀分布的时候,建议使用RANDOM分桶方式。 - -7. PROPERTIES - 1) 如果 ENGINE 类型为 olap - 可以在 properties 设置该表数据的初始存储介质、存储到期时间和副本数。 - - ``` - PROPERTIES ( - "storage_medium" = "[SSD|HDD]", - ["storage_cooldown_time" = "yyyy-MM-dd HH:mm:ss"], - ["remote_storage_resource" = "xxx"], - ["remote_storage_cooldown_time" = "yyyy-MM-dd HH:mm:ss"], - ["replication_num" = "3"] - ["replication_allocation" = "xxx"] - ) - ``` - - storage_medium: 于指定该分区的初始存储介质,可选择 SSD 或 HDD。默认初始存储介质可通过fe的配置文件 `fe.conf` 中指定 `default_storage_medium=xxx`,如果没有指定,则默认为 HDD。 - 注意:当FE配置项 `enable_strict_storage_medium_check` 为 `True` 时,若集群中没有设置对应的存储介质时,建表语句会报错 `Failed to find enough host in all backends with storage medium is SSD|HDD`. - storage_cooldown_time: 设置存储介质为 SSD 时,指定该分区在 SSD 上的存储到期时间。 - 默认存放 30 天。 - 格式为:"yyyy-MM-dd HH:mm:ss" - remote_storage_resource: 远端存储资源名称,需要与 remote_storage_cooldown_time 参数搭配使用。 - remote_storage_cooldown_time: 与 remote_storage_resource 搭配使用。表示该分区在本地存储的到期时间。 - 默认不过期。如果与 storage_cooldown_time 搭配使用必须晚于该时间。 - 格式为:"yyyy-MM-dd HH:mm:ss" - replication_num: 指定分区的副本数。默认为 3。 - replication_allocation: 按照资源标签来指定副本分布。 - - 当表为单分区表时,这些属性为表的属性。 - 当表为两级分区时,这些属性为附属于每一个分区。 - 如果希望不同分区有不同属性。可以通过 ADD PARTITION 或 MODIFY PARTITION 进行操作 - - 2) 如果 Engine 类型为 olap, 可以指定某列使用 bloom filter 索引 - bloom filter 索引仅适用于查询条件为 in 和 equal 的情况,该列的值越分散效果越好 - 目前只支持以下情况的列:除了 TINYINT FLOAT DOUBLE 类型以外的 key 列及聚合方法为 REPLACE 的 value 列 - - ``` - PROPERTIES ( - "bloom_filter_columns"="k1,k2,k3" - ) - ``` - - 3) 如果希望使用 Colocate Join 特性,需要在 properties 中指定 - - ``` - PROPERTIES ( - "colocate_with"="table1" - ) - ``` - - 4) 如果希望使用动态分区特性,需要在properties 中指定。注意:动态分区只支持 RANGE 分区 - - ``` - PROPERTIES ( - "dynamic_partition.enable" = "true|false", - "dynamic_partition.time_unit" = "HOUR|DAY|WEEK|MONTH", - "dynamic_partition.start" = "${integer_value}", - "dynamic_partition.end" = "${integer_value}", - "dynamic_partition.prefix" = "${string_value}", - "dynamic_partition.buckets" = "${integer_value} - ``` - dynamic_partition.enable: 用于指定表级别的动态分区功能是否开启。默认为 true。 - dynamic_partition.time_unit: 用于指定动态添加分区的时间单位,可选择为HOUR(小时),DAY(天),WEEK(周),MONTH(月)。 - 注意:以小时为单位的分区列,数据类型不能为 DATE。 - dynamic_partition.start: 用于指定向前删除多少个分区。值必须小于0。默认为 Integer.MIN_VALUE。 - dynamic_partition.end: 用于指定提前创建的分区数量。值必须大于0。 - dynamic_partition.prefix: 用于指定创建的分区名前缀,例如分区名前缀为p,则自动创建分区名为p20200108 - dynamic_partition.buckets: 用于指定自动创建的分区分桶数量 - dynamic_partition.create_history_partition: 用于创建历史分区功能是否开启。默认为 false。 - dynamic_partition.history_partition_num: 当开启创建历史分区功能时,用于指定创建历史分区数量。 - dynamic_partition.reserved_history_periods: 用于指定保留的历史分区的时间段。 - - 5) 建表时可以批量创建多个 Rollup - 语法: - ``` - ROLLUP (rollup_name (column_name1, column_name2, ...) - [FROM from_index_name] - [PROPERTIES ("key"="value", ...)],...) - ``` - - 6) 如果希望使用 内存表 特性,需要在 properties 中指定 - - ``` - PROPERTIES ( - "in_memory"="true" - ) - ``` - 当 in_memory 属性为 true 时,Doris会尽可能将该表的数据和索引Cache到BE 内存中 - - 7) 创建UNIQUE_KEYS表时,可以指定一个sequence列,当KEY列相同时,将按照sequence列进行REPLACE(较大值替换较小值,否则无法替换) - - ``` - PROPERTIES ( - "function_column.sequence_type" = 'Date', - ); - ``` - sequence_type用来指定sequence列的类型,可以为整型和时间类型 -## example - -1. 创建一个 olap 表,使用 HASH 分桶,使用列存,相同key的记录进行聚合 - - ``` - CREATE TABLE example_db.table_hash - ( - k1 BOOLEAN, - k2 TINYINT, - k3 DECIMAL(10, 2) DEFAULT "10.5", - v1 CHAR(10) REPLACE, - v2 INT SUM - ) - ENGINE=olap - AGGREGATE KEY(k1, k2, k3) - COMMENT "my first doris table" - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` - -2. 创建一个 olap 表,使用 Hash 分桶,使用列存,相同key的记录进行覆盖, - 设置初始存储介质和冷却时间 - - ``` - CREATE TABLE example_db.table_hash - ( - k1 BIGINT, - k2 LARGEINT, - v1 VARCHAR(2048) REPLACE, - v2 SMALLINT SUM DEFAULT "10" - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH (k1, k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", - "storage_cooldown_time" = "2015-06-04 00:00:00" - ); - ``` -3. 创建一个 olap 表,使用 Hash 分桶,使用列存,相同key的记录进行覆盖,设置初始存储介质和冷却时间 - 设置远端存储和冷数据存储介质 - - ``` - CREATE TABLE example_db.table_hash - ( - k1 BIGINT, - k2 LARGEINT, - v1 VARCHAR(2048) REPLACE, - v2 SMALLINT SUM DEFAULT "10" - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH (k1, k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", - "storage_cooldown_time" = "2015-06-04 00:00:00", - "remote_storage_resource" = "remote_s3", - "remote_storage_cooldown_time" = "2015-12-04 00:00:00" - ); - ``` - -4. 创建一个 olap 表,使用 Range 分区,使用Hash分桶,默认使用列存, - 相同key的记录同时存在,设置初始存储介质和冷却时间 - - 1)LESS THAN - - ``` - CREATE TABLE example_db.table_range - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1) - ( - PARTITION p1 VALUES LESS THAN ("2014-01-01"), - PARTITION p2 VALUES LESS THAN ("2014-06-01"), - PARTITION p3 VALUES LESS THAN ("2014-12-01") - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", "storage_cooldown_time" = "2015-06-04 00:00:00" - ); - ``` - - 说明: - 这个语句会将数据划分成如下3个分区: - - ``` - ( { MIN }, {"2014-01-01"} ) - [ {"2014-01-01"}, {"2014-06-01"} ) - [ {"2014-06-01"}, {"2014-12-01"} ) - ``` - - 不在这些分区范围内的数据将视为非法数据被过滤 - - 2) Fixed Range - - ``` - CREATE TABLE table_range - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1, k2, k3) - ( - PARTITION p1 VALUES [("2014-01-01", "10", "200"), ("2014-01-01", "20", "300")), - PARTITION p2 VALUES [("2014-06-01", "100", "200"), ("2014-07-01", "100", "300")) - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD" - ); - ``` - -5. 创建一个 olap 表,使用 List 分区,使用Hash分桶,默认使用列存, - 相同key的记录同时存在,设置初始存储介质和冷却时间 - - 1)单列分区 - - ``` - CREATE TABLE example_db.table_list - ( - k1 INT, - k2 VARCHAR(128), - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY LIST (k1) - ( - PARTITION p1 VALUES IN ("1", "2", "3"), - PARTITION p2 VALUES IN ("4", "5", "6"), - PARTITION p3 VALUES IN ("7", "8", "9") - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", "storage_cooldown_time" = "2022-06-04 00:00:00" - ); - ``` - - 说明: - 这个语句会将数据划分成如下3个分区: - - ``` - ("1", "2", "3") - ("4", "5", "6") - ("7", "8", "9") - ``` - - 不在这些分区枚举值内的数据将视为非法数据被过滤 - - 2) 多列分区 - - ``` - CREATE TABLE example_db.table_list - ( - k1 INT, - k2 VARCHAR(128), - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY LIST (k1, k2) - ( - PARTITION p1 VALUES IN (("1","beijing"), ("1", "shanghai")), - PARTITION p2 VALUES IN (("2","beijing"), ("2", "shanghai")), - PARTITION p3 VALUES IN (("3","beijing"), ("3", "shanghai")) - ) - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", "storage_cooldown_time" = "2022-06-04 00:00:00" - ); - ``` - - 说明: - 这个语句会将数据划分成如下3个分区: - - ``` - (("1","beijing"), ("1", "shanghai")) - (("2","beijing"), ("2", "shanghai")) - (("3","beijing"), ("3", "shanghai")) - ``` - - 不在这些分区枚举值内的数据将视为非法数据被过滤 - -6. 创建一个 mysql 表 - - 6.1 直接通过外表信息创建mysql表 - ``` - CREATE EXTERNAL TABLE example_db.table_mysql - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - k4 VARCHAR(2048), - k5 DATETIME - ) - ENGINE=mysql - PROPERTIES - ( - "host" = "127.0.0.1", - "port" = "8239", - "user" = "mysql_user", - "password" = "mysql_passwd", - "database" = "mysql_db_test", - "table" = "mysql_table_test" - ) - ``` - - 6.2 通过External Catalog Resource创建mysql表 - ``` - CREATE EXTERNAL RESOURCE "mysql_resource" - PROPERTIES - ( - "type" = "odbc_catalog", - "user" = "mysql_user", - "password" = "mysql_passwd", - "host" = "127.0.0.1", - "port" = "8239" - ); - CREATE EXTERNAL TABLE example_db.table_mysql - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - k4 VARCHAR(2048), - k5 DATETIME - ) - ENGINE=mysql - PROPERTIES - ( - "odbc_catalog_resource" = "mysql_resource", - "database" = "mysql_db_test", - "table" = "mysql_table_test" - ) - ``` - -7. 创建一个数据文件存储在HDFS上的 broker 外部表, 数据使用 "|" 分割,"\n" 换行 - - ``` - CREATE EXTERNAL TABLE example_db.table_broker ( - k1 DATE, - k2 INT, - k3 SMALLINT, - k4 VARCHAR(2048), - k5 DATETIME - ) - ENGINE=broker - PROPERTIES ( - "broker_name" = "hdfs", - "path" = "hdfs://hdfs_host:hdfs_port/data1,hdfs://hdfs_host:hdfs_port/data2,hdfs://hdfs_host:hdfs_port/data3%2c4", - "column_separator" = "|", - "line_delimiter" = "\n" - ) - BROKER PROPERTIES ( - "username" = "hdfs_user", - "password" = "hdfs_password" - ) - ``` - -8. 创建一张含有HLL列的表 - - ``` - CREATE TABLE example_db.example_table - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 HLL HLL_UNION, - v2 HLL HLL_UNION - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` - -9. 创建一张含有BITMAP_UNION聚合类型的表(v1和v2列的原始数据类型必须是TINYINT,SMALLINT,INT) - - ``` - CREATE TABLE example_db.example_table - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 BITMAP BITMAP_UNION, - v2 BITMAP BITMAP_UNION - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` - -10. 创建一张含有QUANTILE_UNION聚合类型的表(v1和v2列的原始数据类型必须是数值类型) - - ``` - CREATE TABLE example_db.example_table - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 QUANTILE_STATE QUANTILE_UNION, - v2 QUANTILE_STATE QUANTILE_UNION - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` - -11. 创建两张支持Colocate Join的表t1 和t2 - - ``` - CREATE TABLE `t1` ( - `id` int(11) COMMENT "", - `value` varchar(8) COMMENT "" - ) ENGINE=OLAP - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "t1" - ); - - CREATE TABLE `t2` ( - `id` int(11) COMMENT "", - `value` varchar(8) COMMENT "" - ) ENGINE=OLAP - DUPLICATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 10 - PROPERTIES ( - "colocate_with" = "t1" - ); - ``` - -12. 创建一个数据文件存储在BOS上的 broker 外部表 - - ``` - CREATE EXTERNAL TABLE example_db.table_broker ( - k1 DATE - ) - ENGINE=broker - PROPERTIES ( - "broker_name" = "bos", - "path" = "bos://my_bucket/input/file", - ) - BROKER PROPERTIES ( - "bos_endpoint" = "http://bj.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy" - ) - ``` - -13. 创建一个带有bitmap 索引的表 - - ``` - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 CHAR(10) REPLACE, - v2 INT SUM, - INDEX k1_idx (k1) USING BITMAP COMMENT 'xxxxxx' - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - COMMENT "my first doris table" - DISTRIBUTED BY HASH(k1) BUCKETS 32; - ``` - -14. 创建一个动态分区表(需要在FE配置中开启动态分区功能),该表每天提前创建3天的分区,并删除3天前的分区。例如今天为`2020-01-08`,则会创建分区名为`p20200108`, `p20200109`, `p20200110`, `p20200111`的分区. 分区范围分别为: - - ``` - [types: [DATE]; keys: [2020-01-08]; ‥types: [DATE]; keys: [2020-01-09]; ) - [types: [DATE]; keys: [2020-01-09]; ‥types: [DATE]; keys: [2020-01-10]; ) - [types: [DATE]; keys: [2020-01-10]; ‥types: [DATE]; keys: [2020-01-11]; ) - [types: [DATE]; keys: [2020-01-11]; ‥types: [DATE]; keys: [2020-01-12]; ) - ``` - - ``` - CREATE TABLE example_db.dynamic_partition - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - ENGINE=olap - DUPLICATE KEY(k1, k2, k3) - PARTITION BY RANGE (k1) () - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "storage_medium" = "SSD", - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-3", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32" - ); - ``` - -15. 创建一个带有rollup索引的表 - ``` - CREATE TABLE example_db.rollup_index_table - ( - event_day DATE, - siteid INT DEFAULT '10', - citycode SMALLINT, - username VARCHAR(32) DEFAULT '', - pv BIGINT SUM DEFAULT '0' - ) - AGGREGATE KEY(event_day, siteid, citycode, username) - DISTRIBUTED BY HASH(siteid) BUCKETS 10 - rollup ( - r1(event_day,siteid), - r2(event_day,citycode), - r3(event_day) - ) - PROPERTIES("replication_num" = "3"); - ``` -16. 创建一个内存表 - - ``` - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5", - v1 CHAR(10) REPLACE, - v2 INT SUM, - INDEX k1_idx (k1) USING BITMAP COMMENT 'xxxxxx' - ) - ENGINE=olap - AGGREGATE KEY(k1, k2) - COMMENT "my first doris table" - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ("in_memory"="true"); - ``` - -17. 创建一个hive外部表 - - ``` - CREATE TABLE example_db.table_hive - ( - k1 TINYINT, - k2 VARCHAR(50), - v INT - ) - ENGINE=hive - PROPERTIES - ( - "database" = "hive_db_name", - "table" = "hive_table_name", - "hive.metastore.uris" = "thrift://127.0.0.1:9083" - ); - ``` - -18. 通过 replication_allocation 指定表的副本分布 - - ``` - CREATE TABLE example_db.table_hash - ( - k1 TINYINT, - k2 DECIMAL(10, 2) DEFAULT "10.5" - ) - DISTRIBUTED BY HASH(k1) BUCKETS 32 - PROPERTIES ( - "replication_allocation"="tag.location.group_a:1, tag.location.group_b:2" - ); - - - CREATE TABLE example_db.dynamic_partition - ( - k1 DATE, - k2 INT, - k3 SMALLINT, - v1 VARCHAR(2048), - v2 DATETIME DEFAULT "2014-02-04 15:36:00" - ) - PARTITION BY RANGE (k1) () - DISTRIBUTED BY HASH(k2) BUCKETS 32 - PROPERTIES( - "dynamic_partition.time_unit" = "DAY", - "dynamic_partition.start" = "-3", - "dynamic_partition.end" = "3", - "dynamic_partition.prefix" = "p", - "dynamic_partition.buckets" = "32", - "dynamic_partition."replication_allocation" = "tag.location.group_a:3" - ); - ``` - -19. 创建一个 Iceberg 外表 - - ``` - CREATE TABLE example_db.t_iceberg - ENGINE=ICEBERG - PROPERTIES ( - "iceberg.database" = "iceberg_db", - "iceberg.table" = "iceberg_table", - "iceberg.hive.metastore.uris" = "thrift://127.0.0.1:9083", - "iceberg.catalog.type" = "HIVE_CATALOG" - ); - ``` - -## keyword - - CREATE,TABLE diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE VIEW.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE VIEW.md deleted file mode 100644 index 85ae07fb8a..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/CREATE VIEW.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -{ - "title": "CREATE VIEW", - "language": "zh-CN" -} ---- - - - -# CREATE VIEW -## description - 该语句用于创建一个逻辑视图 - 语法: - CREATE VIEW [IF NOT EXISTS] - [db_name.]view_name - (column1[ COMMENT "col comment"][, column2, ...]) - AS query_stmt - - 说明: - 1. 视图为逻辑视图,没有物理存储。所有在视图上的查询相当于在视图对应的子查询上进行。 - 2. query_stmt 为任意支持的 SQL - -## example - 1. 在 example_db 上创建视图 example_view - - CREATE VIEW example_db.example_view (k1, k2, k3, v1) - AS - SELECT c1 as k1, k2, k3, SUM(v1) FROM example_table - WHERE k1 = 20160112 GROUP BY k1,k2,k3; - - 2. 创建一个包含 comment 的 view - - CREATE VIEW example_db.example_view - ( - k1 COMMENT "first key", - k2 COMMENT "second key", - k3 COMMENT "third key", - v1 COMMENT "first value" - ) - COMMENT "my first view" - AS - SELECT c1 as k1, k2, k3, SUM(v1) FROM example_table - WHERE k1 = 20160112 GROUP BY k1,k2,k3; - -## keyword - CREATE,VIEW - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP DATABASE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP DATABASE.md deleted file mode 100644 index 99c0762d8b..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP DATABASE.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -{ - "title": "DROP DATABASE", - "language": "zh-CN" -} ---- - - - -# DROP DATABASE -## description - 该语句用于删除数据库(database) - 语法: - DROP DATABASE [IF EXISTS] db_name [FORCE]; - - 说明: - 1) 执行 DROP DATABASE 一段时间内,可以通过 RECOVER 语句恢复被删除的数据库。详见 RECOVER 语句 - 2) 如果执行 DROP DATABASE FORCE,则系统不会检查该数据库是否存在未完成的事务,数据库将直接被删除并且不能被恢复,一般不建议执行此操作 - -## example - 1. 删除数据库 db_test - DROP DATABASE db_test; -## keyword - DROP,DATABASE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md deleted file mode 100644 index c0f078715c..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP ENCRYPTKEY.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -{ - "title": "DROP ENCRYPTKEY", - "language": "zh-CN" -} ---- - - - -# DROP ENCRYPTKEY - -## Description - -### Syntax - -``` -DROP ENCRYPTKEY key_name -``` - -### Parameters - -> `key_name`: 要删除密钥的名字, 可以包含数据库的名字。比如:`db1.my_key`。 - -删除一个自定义密钥。密钥的名字完全一致才能够被删除。 - -执行此命令需要用户拥有 `ADMIN` 权限。 - -## example - -1. 删除掉一个密钥 - -``` -DROP ENCRYPTKEY my_key; -``` - -## keyword - - DROP,ENCRYPTKEY diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP INDEX.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP INDEX.md deleted file mode 100644 index 67ded37023..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP INDEX.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -{ - "title": "DROP INDEX", - "language": "zh-CN" -} ---- - - - -# DROP INDEX - -## description - - 该语句用于从一个表中删除指定名称的索引,目前仅支持bitmap 索引 - 语法: - DROP INDEX [IF EXISTS] index_name ON [db_name.]table_name; - -## keyword - - DROP,INDEX diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md deleted file mode 100644 index 461e044b0b..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP MATERIALIZED VIEW.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -{ - "title": "DROP MATERIALIZED VIEW", - "language": "zh-CN" -} ---- - - - -# DROP MATERIALIZED VIEW - -## description - 该语句用于删除物化视图。同步语法 - -语法: - - ``` - DROP MATERIALIZED VIEW [IF EXISTS] mv_name ON table_name - ``` - -1. IF EXISTS - 如果物化视图不存在,不要抛出错误。如果不声明此关键字,物化视图不存在则报错。 - -2. mv_name - 待删除的物化视图的名称。必填项。 - -3. table_name - 待删除的物化视图所属的表名。必填项。 - -## example - -表结构为 - -``` -mysql> desc all_type_table all; -+----------------+-------+----------+------+-------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+----------------+-------+----------+------+-------+---------+-------+ -| all_type_table | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | false | N/A | NONE | -| | k3 | INT | Yes | false | N/A | NONE | -| | k4 | BIGINT | Yes | false | N/A | NONE | -| | k5 | LARGEINT | Yes | false | N/A | NONE | -| | k6 | FLOAT | Yes | false | N/A | NONE | -| | k7 | DOUBLE | Yes | false | N/A | NONE | -| | | | | | | | -| k1_sumk2 | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | false | N/A | SUM | -+----------------+-------+----------+------+-------+---------+-------+ -``` - -1. 删除表 all_type_table 的名为 k1_sumk2 的物化视图 - - ``` - drop materialized view k1_sumk2 on all_type_table; - ``` - 物化视图被删除后的表结构 - - ``` - +----------------+-------+----------+------+-------+---------+-------+ -| IndexName | Field | Type | Null | Key | Default | Extra | -+----------------+-------+----------+------+-------+---------+-------+ -| all_type_table | k1 | TINYINT | Yes | true | N/A | | -| | k2 | SMALLINT | Yes | false | N/A | NONE | -| | k3 | INT | Yes | false | N/A | NONE | -| | k4 | BIGINT | Yes | false | N/A | NONE | -| | k5 | LARGEINT | Yes | false | N/A | NONE | -| | k6 | FLOAT | Yes | false | N/A | NONE | -| | k7 | DOUBLE | Yes | false | N/A | NONE | -+----------------+-------+----------+------+-------+---------+-------+ - ``` - -2. 删除表 all_type_table 中一个不存在的物化视图 - - ``` - drop materialized view k1_k2 on all_type_table; - ERROR 1064 (HY000): errCode = 2, detailMessage = Materialized view [k1_k2] does not exist in table [all_type_table] - ``` - 删除请求直接报错 - -3. 删除表 all_type_table 中的物化视图 k1_k2,不存在不报错。 - - ``` - drop materialized view if exists k1_k2 on all_type_table; -Query OK, 0 rows affected (0.00 sec) - ``` - - 存在则删除,不存在则不报错。 - -## keyword - DROP, MATERIALIZED, VIEW diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md deleted file mode 100644 index ad7cd862bb..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP REPOSITORY.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -{ - "title": "DROP REPOSITORY", - "language": "zh-CN" -} ---- - - - -# DROP REPOSITORY -## description - 该语句用于删除一个已创建的仓库。仅 root 或 superuser 用户可以删除仓库。 - 语法: - DROP REPOSITORY `repo_name`; - - 说明: - 1. 删除仓库,仅仅是删除该仓库在 Palo 中的映射,不会删除实际的仓库数据。删除后,可以再次通过指定相同的 broker 和 LOCATION 映射到该仓库。 - -## example - 1. 删除名为 bos_repo 的仓库: - DROP REPOSITORY `bos_repo`; - -## keyword - DROP, REPOSITORY - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md deleted file mode 100644 index 2f55b17bf6..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP RESOURCE.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "DROP RESOURCE", - "language": "zh-CN" -} ---- - - - -# DROP RESOURCE - -## Description - - 该语句用于删除一个已有的资源。仅 root 或 admin 用户可以删除资源。 - 语法: - DROP RESOURCE 'resource_name' - - 注意:正在使用的 ODBC/S3 资源无法删除。 - -## Example - - 1. 删除名为 spark0 的 Spark 资源: - DROP RESOURCE 'spark0'; - -## keyword - - DROP, RESOURCE diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP TABLE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP TABLE.md deleted file mode 100644 index 8e43bbab76..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP TABLE.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -{ - "title": "DROP TABLE", - "language": "zh-CN" -} ---- - - - -# DROP TABLE -## description - 该语句用于删除 table 。 - 语法: - DROP TABLE [IF EXISTS] [db_name.]table_name [FORCE]; - - 说明: - 1) 执行 DROP TABLE 一段时间内,可以通过 RECOVER 语句恢复被删除的表。详见 RECOVER 语句 - 2) 如果执行 DROP TABLE FORCE,则系统不会检查该表是否存在未完成的事务,表将直接被删除并且不能被恢复,一般不建议执行此操作 - -## example - 1. 删除一个 table - DROP TABLE my_table; - - 2. 如果存在,删除指定 database 的 table - DROP TABLE IF EXISTS example_db.my_table; - -## keyword - DROP,TABLE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP VIEW.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP VIEW.md deleted file mode 100644 index dedb001986..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/DROP VIEW.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -{ - "title": "DROP VIEW", - "language": "zh-CN" -} ---- - - - -# DROP VIEW -## description - 该语句用于删除一个逻辑视图 VIEW - 语法: - DROP VIEW [IF EXISTS] - [db_name.]view_name; - -## example - 1. 如果存在,删除 example_db 上的视图 example_view - DROP VIEW IF EXISTS example_db.example_view; - -## keyword - DROP,VIEW - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/HLL.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/HLL.md deleted file mode 100644 index 599e08738f..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/HLL.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -{ - "title": "HLL", - "language": "zh-CN" -} ---- - - - -# HLL -## description - HLL是基于HyperLogLog算法的工程实现,用于保存HyperLogLog计算过程的中间结果,它只能作为表的value列类型 - 通过聚合来不断的减少数据量,以此来实现加快查询的目的,基于它到的是一个估算结果,误差大概在1%左右 - hll列是通过其它列或者导入数据里面的数据生成的,导入的时候通过hll_hash函数来指定数据中哪一列用于生成hll列 - 它常用于替代count distinct,通过结合rollup在业务上用于快速计算uv等 - - 相关函数: - - HLL_UNION_AGG(hll) - 此函数为聚合函数,用于计算满足条件的所有数据的基数估算。此函数还可用于分析函数,只支持默认窗口,不支持window从句。 - - HLL_RAW_AGG(hll) - 此函数为聚合函数,用于聚合hll类型字段,并且返回的还是hll类型。 - - HLL_CARDINALITY(hll) - 此函数用于计算单条hll列的基数估算 - - HLL_HASH(column_name) - 生成HLL列类型,用于insert或导入的时候,导入的使用见相关说明 - - EMPTY_HLL() - 生成空HLL列,用于insert或导入的时候补充默认值,导入的使用见相关说明 - -## example - 1. 首先创建一张含有hll列的表 - create table test( - dt date, - id int, - name char(10), - province char(10), - os char(1), - set1 hll hll_union, - set2 hll hll_union) - distributed by hash(id) buckets 32; - - 2. 导入数据,导入的方式见相关help curl - - a. 使用表中的列生成hll列 - curl --location-trusted -uname:password -T data -H "label:load_1" -H "columns:dt, id, name, province, os, set1=hll_hash(id), set2=hll_hash(name)" - http://host/api/test_db/test/_stream_load - b. 使用数据中的某一列生成hll列 - curl --location-trusted -uname:password -T data -H "label:load_1" -H "columns:dt, id, name, province, sex, cuid, os, set1=hll_hash(cuid), set2=hll_hash(os)" - http://host/api/test_db/test/_stream_load - - 3. 聚合数据,常用方式3种:(如果不聚合直接对base表查询,速度可能跟直接使用approx_count_distinct速度差不多) - - a. 创建一个rollup,让hll列产生聚合, - alter table test add rollup test_rollup(dt, set1); - - b. 创建另外一张专门计算uv的表,然后insert数据) - - create table test_uv( - dt date, - uv_set hll hll_union) - distributed by hash(dt) buckets 32; - - insert into test_uv select dt, set1 from test; - - c. 创建另外一张专门计算uv的表,然后insert并通过hll_hash根据test其它非hll列生成hll列 - - create table test_uv( - dt date, - id_set hll hll_union) - distributed by hash(dt) buckets 32; - - insert into test_uv select dt, hll_hash(id) from test; - - 4. 查询,hll列不允许直接查询它的原始值,可以通过配套的函数进行查询 - - a. 求总uv - select HLL_UNION_AGG(uv_set) from test_uv; - - b. 求每一天的uv - select dt, HLL_CARDINALITY(uv_set) from test_uv; - - c. 求test表中set1的聚合值 - select dt, HLL_CARDINALITY(uv) from (select dt, HLL_RAW_AGG(set1) as uv from test group by dt) tmp; - select dt, HLL_UNION_AGG(set1) as uv from test group by dt; - -## keyword - HLL - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/RECOVER.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/RECOVER.md deleted file mode 100644 index 98cebc18be..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/RECOVER.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -{ - "title": "RECOVER", - "language": "zh-CN" -} ---- - - - -# RECOVER -## description - 该语句用于恢复之前删除的 database、table 或者 partition - 语法: - 1) 恢复 database - RECOVER DATABASE db_name; - 2) 恢复 table - RECOVER TABLE [db_name.]table_name; - 3) 恢复 partition - RECOVER PARTITION partition_name FROM [db_name.]table_name; - - 说明: - 1. 该操作仅能恢复之前一段时间内删除的元信息。默认为 1 天。(可通过fe.conf中`catalog_trash_expire_second`参数配置) - 2. 如果删除元信息后新建立了同名同类型的元信息,则之前删除的元信息不能被恢复 - -## example - 1. 恢复名为 example_db 的 database - RECOVER DATABASE example_db; - - 2. 恢复名为 example_tbl 的 table - RECOVER TABLE example_db.example_tbl; - - 3. 恢复表 example_tbl 中名为 p1 的 partition - RECOVER PARTITION p1 FROM example_tbl; - -## keyword - RECOVER - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md deleted file mode 100644 index b6c545a267..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/REFRESH DATABASE.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -{ - "title": "REFRESH DATABASE", - "language": "zh-CN" -} ---- - - - -# REFRESH DATABASE - -## Description - - 该语句用于同步远端 Iceberg 数据库,会将 Doris 当前数据库下的 Iceberg 外表删除重建,非 Iceberg 外表不受影响。 - 语法: - REFRESH DATABASE db_name; - - 说明: - 1) 仅针对 Doris 中挂载的 Iceberg 数据库有效。 - -## Example - - 1. 刷新数据库 iceberg_test_db - REFRESH DATABASE iceberg_test_db; - -## keyword - - REFRESH,DATABASE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md deleted file mode 100644 index 401ba50af7..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/REFRESH TABLE.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -{ - "title": "REFRESH TABLE", - "language": "zh-CN" -} ---- - - - -# REFRESH TABLE - -## Description - - 该语句用于同步远端 Iceberg 表,会将 Doris 当前的外表删除重建。 - 语法: - REFRESH TABLE tbl_name; - - 说明: - 1) 仅针对 Doris 中挂载的 Iceberg 表有效。 - -## Example - - 1. 刷新表 iceberg_tbl - REFRESH TABLE iceberg_tbl; - -## keyword - - REFRESH,TABLE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/RESTORE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/RESTORE.md deleted file mode 100644 index 131db9b9af..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/RESTORE.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -{ - "title": "RESTORE", - "language": "zh-CN" -} ---- - - - -# RESTORE -## description - 1. RESTORE - 该语句用于将之前通过 BACKUP 命令备份的数据,恢复到指定数据库下。该命令为异步操作。提交成功后,需通过 SHOW RESTORE 命令查看进度。仅支持恢复 OLAP 类型的表。 - 语法: - RESTORE SNAPSHOT [db_name].{snapshot_name} - FROM `repository_name` - [ON|EXCLUDE] ( - `table_name` [PARTITION (`p1`, ...)] [AS `tbl_alias`], - ... - ) - PROPERTIES ("key"="value", ...); - - 说明: - 1. 同一数据库下只能有一个正在执行的 BACKUP 或 RESTORE 任务。 - 2. ON 子句中标识需要恢复的表和分区。如果不指定分区,则默认恢复该表的所有分区。所指定的表和分区必须已存在于仓库备份中。 - 3. EXCLUDE 子句中标识不需要恢复的表和分区。除了所指定的表或分区之外仓库中所有其他表的所有分区将被恢复。 - 4. 可以通过 AS 语句将仓库中备份的表名恢复为新的表。但新表名不能已存在于数据库中。分区名称不能修改。 - 5. 可以将仓库中备份的表恢复替换数据库中已有的同名表,但须保证两张表的表结构完全一致。表结构包括:表名、列、分区、Rollup等等。 - 6. 可以指定恢复表的部分分区,系统会检查分区 Range 或者 List 是否能够匹配。 - 7. PROPERTIES 目前支持以下属性: - "backup_timestamp" = "2018-05-04-16-45-08":指定了恢复对应备份的哪个时间版本,必填。该信息可以通过 `SHOW SNAPSHOT ON repo;` 语句获得。 - "replication_num" = "3":指定恢复的表或分区的副本数。默认为3。若恢复已存在的表或分区,则副本数必须和已存在表或分区的副本数相同。同时,必须有足够的 host 容纳多个副本。 - "timeout" = "3600":任务超时时间,默认为一天。单位秒。 - "meta_version" = 40:使用指定的 meta_version 来读取之前备份的元数据。注意,该参数作为临时方案,仅用于恢复老版本 Doris 备份的数据。最新版本的备份数据中已经包含 meta version,无需再指定。 - -## example - 1. 从 example_repo 中恢复备份 snapshot_1 中的表 backup_tbl 到数据库 example_db1,时间版本为 "2018-05-04-16-45-08"。恢复为 1 个副本: - RESTORE SNAPSHOT example_db1.`snapshot_1` - FROM `example_repo` - ON ( `backup_tbl` ) - PROPERTIES - ( - "backup_timestamp"="2018-05-04-16-45-08", - "replication_num" = "1" - ); - - 2. 从 example_repo 中恢复备份 snapshot_2 中的表 backup_tbl 的分区 p1,p2,以及表 backup_tbl2 到数据库 example_db1,并重命名为 new_tbl,时间版本为 "2018-05-04-17-11-01"。默认恢复为 3 个副本: - RESTORE SNAPSHOT example_db1.`snapshot_2` - FROM `example_repo` - ON - ( - `backup_tbl` PARTITION (`p1`, `p2`), - `backup_tbl2` AS `new_tbl` - ) - PROPERTIES - ( - "backup_timestamp"="2018-05-04-17-11-01" - ); - - 3. 从 example_repo 中恢复备份 snapshot_3 中除了表 backup_tbl 的其他所有表到数据库 example_db1,时间版本为 "2018-05-04-18-12-18"。 - RESTORE SNAPSHOT example_db1.`snapshot_3` - FROM `example_repo` - EXCLUDE ( `backup_tbl` ) - PROPERTIES - ( - "backup_timestamp"="2018-05-04-18-12-18" - ); - -## keyword - RESTORE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md deleted file mode 100644 index 54f09b2479..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/SHOW ENCRYPTKEYS.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -{ - "title": "SHOW ENCRYPTKEYS", - "language": "zh-CN" -} ---- - - - -# SHOW ENCRYPTKEYS - -## Description - -### Syntax - -``` -SHOW ENCRYPTKEYS [IN|FROM db] [LIKE 'key_pattern'] -``` - -### Parameters - ->`db`: 要查询的数据库名字 ->`key_pattern`: 用来过滤密钥名称的参数 - -查看数据库下所有的自定义的密钥。如果用户指定了数据库,那么查看对应数据库的,否则直接查询当前会话所在数据库。 - -需要对这个数据库拥有 `ADMIN` 权限 - -## Example - - ``` - mysql> SHOW ENCRYPTKEYS; - +-------------------+-------------------+ - | EncryptKey Name | EncryptKey String | - +-------------------+-------------------+ - | example_db.my_key | ABCD123456789 | - +-------------------+-------------------+ - 1 row in set (0.00 sec) - - mysql> SHOW ENCRYPTKEYS FROM example_db LIKE "%my%"; - +-------------------+-------------------+ - | EncryptKey Name | EncryptKey String | - +-------------------+-------------------+ - | example_db.my_key | ABCD123456789 | - +-------------------+-------------------+ - 1 row in set (0.00 sec) - ``` - -## keyword - - SHOW,ENCRYPTKEYS diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md deleted file mode 100644 index 97981e6424..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/SHOW RESOURCES.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -{ - "title": "SHOW RESOURCES", - "language": "zh-CN" -} ---- - - - -# SHOW RESOURCES - -## Description - - 该语句用于展示用户有使用权限的资源。普通用户仅能展示有使用权限的资源,root 或 admin 用户会展示所有的资源。 - - 语法: - - SHOW RESOURCES - [ - WHERE - [NAME [ = "your_resource_name" | LIKE "name_matcher"]] - [RESOURCETYPE = ["[spark|odbc_catalog|s3]"]] - ] - [ORDER BY ...] - [LIMIT limit][OFFSET offset]; - - 说明: - 1) 如果使用 NAME LIKE,则会匹配 RESOURCES 的 Name 包含 name_matcher 的 Resource - 2) 如果使用 NAME = ,则精确匹配指定的 Name - 3) 如果指定了 RESOURCETYPE,则匹配对应的 Resrouce 类型 - 4) 可以使用 ORDER BY 对任意列组合进行排序 - 5) 如果指定了 LIMIT,则显示 limit 条匹配记录。否则全部显示 - 6) 如果指定了 OFFSET,则从偏移量 offset 开始显示查询结果。默认情况下偏移量为 0。 - -## Example - - 1. 展示当前用户拥有权限的所有Resource - SHOW RESOURCES; - - 2. 展示指定 Resource ,NAME 中包含字符串 "20140102",展示10个属性 - SHOW RESOURCES WHERE NAME LIKE "2014_01_02" LIMIT 10; - - 3. 展示指定 Resource ,指定 NAME 为 "20140102" 并按 KEY 降序排序 - SHOW RESOURCES WHERE NAME = "20140102" ORDER BY `KEY` DESC; - - -## keyword - - SHOW RESOURCES, RESOURCES diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md deleted file mode 100644 index afb4919f6d..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/TRUNCATE TABLE.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -{ - "title": "TRUNCATE TABLE", - "language": "zh-CN" -} ---- - - - -# TRUNCATE TABLE -## description - 该语句用于清空指定表和分区的数据 - 语法: - - TRUNCATE TABLE [db.]tbl[ PARTITION(p1, p2, ...)]; - - 说明: - 1. 该语句清空数据,但保留表或分区。 - 2. 不同于 DELETE,该语句只能整体清空指定的表或分区,不能添加过滤条件。 - 3. 不同于 DELETE,使用该方式清空数据不会对查询性能造成影响。 - 4. 该操作删除的数据不可恢复。 - 5. 使用该命令时,表状态需为 NORMAL,即不允许正在进行 SCHEMA CHANGE 等操作。 - -## example - - 1. 清空 example_db 下的表 tbl - - TRUNCATE TABLE example_db.tbl; - - 2. 清空表 tbl 的 p1 和 p2 分区 - - TRUNCATE TABLE tbl PARTITION(p1, p2); - -## keyword - TRUNCATE,TABLE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/create-function.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/create-function.md deleted file mode 100644 index 902462664a..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/create-function.md +++ /dev/null @@ -1,153 +0,0 @@ ---- -{ - "title": "CREATE FUNCTION", - "language": "zh-CN" -} ---- - - - -# CREATE FUNCTION -## Description -### Syntax - -``` -CREATE [AGGREGATE] [ALIAS] FUNCTION function_name - (arg_type [, ...]) - [RETURNS ret_type] - [INTERMEDIATE inter_type] - [WITH PARAMETER(param [,...]) AS origin_function] - [PROPERTIES ("key" = "value" [, ...]) ] -``` - -### Parameters - -> `AGGREGATE`: 如果有此项,表示的是创建的函数是一个聚合函数。 -> -> `ALIAS`:如果有此项,表示的是创建的函数是一个别名函数。 -> -> 如果没有上述两项,表示创建的函数是一个标量函数 -> -> `function_name`: 要创建函数的名字, 可以包含数据库的名字。比如:`db1.my_func`。 -> -> `arg_type`: 函数的参数类型,与建表时定义的类型一致。变长参数时可以使用`, ...`来表示,如果是变长类型,那么变长部分参数的类型与最后一个非变长参数类型一致。 -> **注意**:`ALIAS FUNCTION` 不支持变长参数,且至少有一个参数。 特别地,`ALL` 类型指任一数据类型,只可以用于 `ALIAS FUNCTION`. -> -> `ret_type`: 对创建新的函数来说,是必填项。如果是给已有函数取别名则可不用填写该参数。 -> -> `inter_type`: 用于表示聚合函数中间阶段的数据类型。 -> -> `param`:用于表示别名函数的参数,至少包含一个。 -> -> `origin_function`:用于表示别名函数对应的原始函数。 -> -> `properties`: 用于设定聚合函数和标量函数相关属性,能够设置的属性包括 -> -> "object_file": 自定义函数动态库的URL路径,当前只支持 HTTP/HTTPS 协议,此路径需要在函数整个生命周期内保持有效。此选项为必选项 -> -> "symbol": 标量函数的函数签名,用于从动态库里面找到函数入口。此选项对于标量函数是必选项 -> -> "init_fn": 聚合函数的初始化函数签名。对于聚合函数是必选项 -> -> "update_fn": 聚合函数的更新函数签名。对于聚合函数是必选项 -> -> "merge_fn": 聚合函数的合并函数签名。对于聚合函数是必选项 -> -> "serialize_fn": 聚合函数的序列化函数签名。对于聚合函数是可选项,如果没有指定,那么将会使用默认的序列化函数 -> -> "finalize_fn": 聚合函数获取最后结果的函数签名。对于聚合函数是可选项,如果没有指定,将会使用默认的获取结果函数 -> -> "md5": 函数动态链接库的MD5值,用于校验下载的内容是否正确。此选项是可选项 -> -> "prepare_fn": 自定义函数的prepare函数的函数签名,用于从动态库里面找到prepare函数入口。此选项对于自定义函数是可选项 -> -> "close_fn": 自定义函数的close函数的函数签名,用于从动态库里面找到close函数入口。此选项对于自定义函数是可选项 -> "type": 自定义函数的类型,如果是远程函数就是则填 RPC,C++的原生 UDF 填 NATIVE, 默认 NATIVE - - -此语句创建一个自定义函数。执行此命令需要用户拥有 `ADMIN` 权限。 - -如果 `function_name` 中包含了数据库名字,那么这个自定义函数会创建在对应的数据库中,否则这个函数将会创建在当前会话所在的数据库。新函数的名字与参数不能够与当前命名空间中已存在的函数相同,否则会创建失败。但是只有名字相同,参数不同是能够创建成功的。 - -## example - -1. 创建一个自定义标量函数 - - ``` - CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES ( - "symbol" = "_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_", - "object_file" = "http://host:port/libmyadd.so" - ); - ``` - -2. 创建一个有prepare/close函数的自定义标量函数 - - ``` - CREATE FUNCTION my_add(INT, INT) RETURNS INT PROPERTIES ( - "symbol" = "_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_", - "prepare_fn" = "_ZN9doris_udf14AddUdf_prepareEPNS_15FunctionContextENS0_18FunctionStateScopeE", - "close_fn" = "_ZN9doris_udf12AddUdf_closeEPNS_15FunctionContextENS0_18FunctionStateScopeE", - "object_file" = "http://host:port/libmyadd.so" - ); - ``` - -3. 创建一个自定义聚合函数 - - ``` - CREATE AGGREGATE FUNCTION my_count (BIGINT) RETURNS BIGINT PROPERTIES ( - "init_fn"="_ZN9doris_udf9CountInitEPNS_15FunctionContextEPNS_9BigIntValE", - "update_fn"="_ZN9doris_udf11CountUpdateEPNS_15FunctionContextERKNS_6IntValEPNS_9BigIntValE", - "merge_fn"="_ZN9doris_udf10CountMergeEPNS_15FunctionContextERKNS_9BigIntValEPS2_", - "finalize_fn"="_ZN9doris_udf13CountFinalizeEPNS_15FunctionContextERKNS_9BigIntValE", - "object_file"="http://host:port/libudasample.so" - ); - ``` - -4. 创建一个变长参数的标量函数 - - ``` - CREATE FUNCTION strconcat(varchar, ...) RETURNS varchar properties ( - "symbol" = "_ZN9doris_udf6StrConcatUdfEPNS_15FunctionContextERKNS_6IntValES4_", - "object_file" = "http://host:port/libmyStrConcat.so" - ); - ``` - -5. 创建一个自定义别名函数 - - ``` - -- 创建自定义功能别名函数 - CREATE ALIAS FUNCTION id_masking(BIGINT) WITH PARAMETER(id) - AS CONCAT(LEFT(id, 3), '****', RIGHT(id, 4)); - - -- 创建自定义 CAST 别名函数 - CREATE ALIAS FUNCTION string(ALL, INT) WITH PARAMETER(col, length) - AS CAST(col AS varchar(length)); - ``` -6. 创建一个远程自动函数 - ``` - CREATE FUNCTION rpc_add(INT, INT) RETURNS INT PROPERTIES ( - "SYMBOL"="add_int", - "OBJECT_FILE"="127.0.0.1:9999", - "TYPE"="RPC" - ); - ``` -## keyword - - CREATE,FUNCTION diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/drop-function.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/drop-function.md deleted file mode 100644 index 6fb66aed51..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/drop-function.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -{ - "title": "DROP FUNCTION", - "language": "zh-CN" -} ---- - - - -# DROP FUNCTION -## description -### Syntax - -``` -DROP FUNCTION function_name - (arg_type [, ...]) -``` - -### Parameters - -> `function_name`: 要删除函数的名字 -> -> `arg_type`: 要删除函数的参数列表 -> - - -删除一个自定义函数。函数的名字、参数类型完全一致才能够被删除 - -## example - -1. 删除掉一个函数 - -``` -DROP FUNCTION my_add(INT, INT) -``` - -## keyword - - DROP,FUNCTION diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Definition/show-functions.md b/docs/zh-CN/sql-reference/sql-statements/Data Definition/show-functions.md deleted file mode 100644 index 3710109b4a..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Definition/show-functions.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -{ - "title": "SHOW FUNCTIONS", - "language": "zh-CN" -} ---- - - - -# SHOW FUNCTIONS -## description -### Syntax - -``` -SHOW [FULL] [BUILTIN] FUNCTIONS [IN|FROM db] [LIKE 'function_pattern'] -``` - -### Parameters - ->`full`:表示显示函数的详细信息 ->`builtin`:表示显示系统提供的函数 ->`db`: 要查询的数据库名字 ->`function_pattern`: 用来过滤函数名称的参数 - - -查看数据库下所有的自定义(系统提供)的函数。如果用户指定了数据库,那么查看对应数据库的,否则直接查询当前会话所在数据库 - -需要对这个数据库拥有 `SHOW` 权限 - -## example - -``` -mysql> show full functions in testDb\G -*************************** 1. row *************************** - Signature: my_add(INT,INT) - Return Type: INT - Function Type: Scalar -Intermediate Type: NULL - Properties: {"symbol":"_ZN9doris_udf6AddUdfEPNS_15FunctionContextERKNS_6IntValES4_","object_file":"http://host:port/libudfsample.so","md5":"cfe7a362d10f3aaf6c49974ee0f1f878"} -*************************** 2. row *************************** - Signature: my_count(BIGINT) - Return Type: BIGINT - Function Type: Aggregate -Intermediate Type: NULL - Properties: {"object_file":"http://host:port/libudasample.so","finalize_fn":"_ZN9doris_udf13CountFinalizeEPNS_15FunctionContextERKNS_9BigIntValE","init_fn":"_ZN9doris_udf9CountInitEPNS_15FunctionContextEPNS_9BigIntValE","merge_fn":"_ZN9doris_udf10CountMergeEPNS_15FunctionContextERKNS_9BigIntValEPS2_","md5":"37d185f80f95569e2676da3d5b5b9d2f","update_fn":"_ZN9doris_udf11CountUpdateEPNS_15FunctionContextERKNS_6IntValEPNS_9BigIntValE"} -*************************** 3. row *************************** - Signature: id_masking(BIGINT) - Return Type: VARCHAR - Function Type: Alias -Intermediate Type: NULL - Properties: {"parameter":"id","origin_function":"concat(left(`id`, 3), `****`, right(`id`, 4))"} - -3 rows in set (0.00 sec) -mysql> show builtin functions in testDb like 'year%'; -+---------------+ -| Function Name | -+---------------+ -| year | -| years_add | -| years_diff | -| years_sub | -+---------------+ -2 rows in set (0.00 sec) -``` - -## keyword - - SHOW,FUNCTIONS diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BEGIN.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BEGIN.md deleted file mode 100644 index a286a3f785..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BEGIN.md +++ /dev/null @@ -1,93 +0,0 @@ ---- -{ - "title": "BEGIN", - "language": "zh-CN" -} ---- - - - -# BEGIN, COMMIT, ROLLBACK -## Description -### Syntax - -``` -BEGIN; -INSERT INTO table_name ... -COMMIT; -``` - -``` -BEGIN [ WITH LABEL label]; -INSERT INTO table_name ... -ROLLBACK; -``` -### Parameters - -> label: 用于指定当前事务的标签名。 - -### Note - -事务只能对insert使用,而不能对update和delete使用,当指定标签时,可通过以下命令检查事务的运行状态: `SHOW TRANSACTION WHERE LABEL = 'label'` - -## example - -1. 开启一个事务,不指定标签,执行insert后提交。 - -``` -BEGIN -INSERT INTO test VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, DEFAULT); -INSERT INTO test (c1) VALUES (1); -COMMIT: -``` - -所有在`begin`和`commit`之间的数据会被插入到test表中。 - -2. 开启一个事务,不指定标签,执行insert后,回滚。 - -``` -BEGIN -INSERT INTO test VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, DEFAULT); -INSERT INTO test (c1) VALUES (1); -ROLLBACK: -``` - -所有在`begin`和`commit`之间的数据会取消,没有任何数据插入到test表中。 - -3. 开启一个事务,指定标签为test_label1,执行insert后提交。 - -``` -BEGIN WITH LABEL test_label1 -INSERT INTO test VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, DEFAULT); -INSERT INTO test (c1) VALUES (1); -COMMIT: -``` - -所有在`begin`和`commit`之间的数据会被插入到test表中。 -标签`test_label1`用于标记该事务,可以通过以下命令来检查事务的状态:`SHOW TRANSACTION WHERE LABEL = 'test_label1'`。 - -## keyword -BEGIN, COMMIT, ROLLBACK diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md deleted file mode 100644 index 653587110a..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md +++ /dev/null @@ -1,603 +0,0 @@ ---- -{ - "title": "BROKER LOAD", - "language": "zh-CN" -} ---- - - - -# BROKER LOAD -## description - - Broker load 通过随 Doris 集群一同部署的 broker 进行,访问对应数据源的数据,进行数据导入。 - 可以通过 show broker 命令查看已经部署的 broker。 - 目前支持以下5种数据源: - - 1. Baidu HDFS:百度内部的 hdfs,仅限于百度内部使用。 - 2. Baidu AFS:百度内部的 afs,仅限于百度内部使用。 - 3. Baidu Object Storage(BOS):百度对象存储。仅限百度内部用户、公有云用户或其他可以访问 BOS 的用户使用。 - 4. Apache HDFS:社区版本 hdfs。 - 5. Amazon S3:Amazon对象存储。 - -语法: - - LOAD LABEL load_label - ( - data_desc1[, data_desc2, ...] - ) - WITH [BROKER broker_name | S3] - [load_properties] - [opt_properties]; - - 1. load_label - - 当前导入批次的标签。在一个 database 内唯一。 - 语法: - [database_name.]your_label - - 2. data_desc - - 用于描述一批导入数据。 - 语法: - [MERGE|APPEND|DELETE] - DATA INFILE - ( - "file_path1"[, file_path2, ...] - ) - [NEGATIVE] - INTO TABLE `table_name` - [PARTITION (p1, p2)] - [COLUMNS TERMINATED BY "column_separator"] - [FORMAT AS "file_type"] - [(column_list)] - [SET (k1 = func(k2))] - [PRECEDING FILTER predicate] - [WHERE predicate] - [DELETE ON label=true] - [ORDER BY source_sequence] - [read_properties] - - 说明: - file_path: - - 文件路径,可以指定到一个文件,也可以用 * 通配符指定某个目录下的所有文件。通配符必须匹配到文件,而不能是目录。 - - PARTITION: - - 如果指定此参数,则只会导入指定的分区,导入分区以外的数据会被过滤掉。 - 如果不指定,默认导入table的所有分区。 - - NEGATIVE: - 如果指定此参数,则相当于导入一批“负”数据。用于抵消之前导入的同一批数据。 - 该参数仅适用于存在 value 列,并且 value 列的聚合类型仅为 SUM 的情况。 - - column_separator: - - 用于指定导入文件中的列分隔符。默认为 \t - 如果是不可见字符,则需要加\\x作为前缀,使用十六进制来表示分隔符。 - 如hive文件的分隔符\x01,指定为"\\x01" - - file_type: - - 用于指定导入文件的类型,例如:parquet、orc、csv、csv_with_names、csv_with_names_and_types。默认值通过文件后缀名判断。 - - column_list: - - 用于指定导入文件中的列和 table 中的列的对应关系。 - 当需要跳过导入文件中的某一列时,将该列指定为 table 中不存在的列名即可。 - 语法: - (col_name1, col_name2, ...) - - SET: - - 如果指定此参数,可以将源文件某一列按照函数进行转化,然后将转化后的结果导入到table中。语法为 `column_name` = expression。举几个例子帮助理解。 - 例1: 表中有3个列“c1, c2, c3", 源文件中前两列依次对应(c1,c2),后两列之和对应c3;那么需要指定 columns (c1,c2,tmp_c3,tmp_c4) SET (c3=tmp_c3+tmp_c4); - 例2: 表中有3个列“year, month, day"三个列,源文件中只有一个时间列,为”2018-06-01 01:02:03“格式。 - 那么可以指定 columns(tmp_time) set (year = year(tmp_time), month=month(tmp_time), day=day(tmp_time)) 完成导入。 - - PRECEDING FILTER predicate: - - 用于过滤原始数据。原始数据是未经列映射、转换的数据。用户可以在对转换前的数据前进行一次过滤,选取期望的数据,再进行转换。 - - WHERE: - - 对做完 transform 的数据进行过滤,符合 where 条件的数据才能被导入。WHERE 语句中只可引用表中列名。 - - merge_type: - - 数据的合并类型,一共支持三种类型APPEND、DELETE、MERGE 其中,APPEND是默认值,表示这批数据全部需要追加到现有数据中,DELETE 表示删除与这批数据key相同的所有行,MERGE 语义 需要与delete on条件联合使用,表示满足delete 条件的数据按照DELETE 语义处理其余的按照APPEND 语义处理, - - delete_on_predicates: - - 表示删除条件,仅在 merge type 为MERGE 时有意义,语法与where 相同 - - ORDER BY: - - 只适用于UNIQUE_KEYS,相同key列下,保证value列按照source_sequence进行REPLACE, source_sequence可以是数据源中的列,也可以是表结构中的一列。 - - read_properties: - - 用于指定一些特殊参数。 - 语法: - [PROPERTIES ("key"="value", ...)] - - 可以指定如下参数: - - line_delimiter: 用于指定导入文件中的换行符,默认为\n。可以使用做多个字符的组合作为换行符。 - - fuzzy_parse: 布尔类型,为true表示json将以第一行为schema 进行解析,开启这个选项可以提高json 导入效率,但是要求所有json 对象的key的顺序和第一行一致, 默认为false,仅用于json格式。 - - jsonpaths: 导入json方式分为:简单模式和匹配模式。 - 简单模式:没有设置jsonpaths参数即为简单模式,这种模式下要求json数据是对象类型,例如: - {"k1":1, "k2":2, "k3":"hello"},其中k1,k2,k3是列名字。 - 匹配模式:用于json数据相对复杂,需要通过jsonpaths参数匹配对应的value。 - - strip_outer_array: 布尔类型,为true表示json数据以数组对象开始且将数组对象中进行展平,默认值是false。例如: - [ - {"k1" : 1, "v1" : 2}, - {"k1" : 3, "v1" : 4} - ] - 当strip_outer_array为true,最后导入到doris中会生成两行数据。 - - json_root: json_root为合法的jsonpath字符串,用于指定json document的根节点,默认值为""。 - - num_as_string: 布尔类型,为true表示在解析json数据时会将数字类型转为字符串,然后在确保不会出现精度丢失的情况下进行导入。 - - 3. broker_name - - 所使用的 broker 名称,可以通过 show broker 命令查看。 - - 4. load_properties - - 用于提供通过 broker 访问数据源的信息。不同的 broker,以及不同的访问方式,需要提供的信息不同。 - - 4.1. Baidu HDFS/AFS - - 访问百度内部的 hdfs/afs 目前仅支持简单认证,需提供: - username:hdfs 用户名 - password:hdfs 密码 - - 4.2. BOS - - 需提供: - bos_endpoint:BOS 的endpoint - bos_accesskey:公有云用户的 accesskey - bos_secret_accesskey:公有云用户的 secret_accesskey - - 4.3. Apache HDFS - - 社区版本的 hdfs,支持简单认证、kerberos 认证。以及支持 HA 配置。 - 简单认证: - hadoop.security.authentication = simple (默认) - username:hdfs 用户名 - password:hdfs 密码 - - kerberos 认证: - hadoop.security.authentication = kerberos - kerberos_principal:指定 kerberos 的 principal - kerberos_keytab:指定 kerberos 的 keytab 文件路径。该文件必须为 broker 进程所在服务器上的文件。 - kerberos_keytab_content:指定 kerberos 中 keytab 文件内容经过 base64 编码之后的内容。这个跟 kerberos_keytab 配置二选一就可以。 - - namenode HA: - 通过配置 namenode HA,可以在 namenode 切换时,自动识别到新的 namenode - dfs.nameservices: 指定 hdfs 服务的名字,自定义,如:"dfs.nameservices" = "my_ha" - dfs.ha.namenodes.xxx:自定义 namenode 的名字,多个名字以逗号分隔。其中 xxx 为 dfs.nameservices 中自定义的名字,如 "dfs.ha.namenodes.my_ha" = "my_nn" - dfs.namenode.rpc-address.xxx.nn:指定 namenode 的rpc地址信息。其中 nn 表示 dfs.ha.namenodes.xxx 中配置的 namenode 的名字,如:"dfs.namenode.rpc-address.my_ha.my_nn" = "host:port" - dfs.client.failover.proxy.provider:指定 client 连接 namenode 的 provider,默认为:org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider - - 4.4. Amazon S3 - - 需提供: - fs.s3a.access.key:AmazonS3的access key - fs.s3a.secret.key:AmazonS3的secret key - fs.s3a.endpoint:AmazonS3的endpoint - 4.5. 如果使用S3协议直接连接远程存储时需要指定如下属性 - - ( - "AWS_ENDPOINT" = "", - "AWS_ACCESS_KEY" = "", - "AWS_SECRET_KEY"="", - "AWS_REGION" = "" - ) - 4.6. 如果使用HDFS协议直接连接远程存储时需要指定如下属性 - ( - "fs.defaultFS" = "", - "hdfs_user"="", - "dfs.nameservices"="my_ha", - "dfs.ha.namenodes.xxx"="my_nn1,my_nn2", - "dfs.namenode.rpc-address.xxx.my_nn1"="host1:port", - "dfs.namenode.rpc-address.xxx.my_nn2"="host2:port", - "dfs.client.failover.proxy.provider.xxx"="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ) - fs.defaultFS: hdfs集群defaultFS - hdfs_user: 连接hdfs集群时使用的用户名 - namenode HA: - 通过配置 namenode HA,可以在 namenode 切换时,自动识别到新的 namenode - dfs.nameservices: 指定 hdfs 服务的名字,自定义,如:"dfs.nameservices" = "my_ha" - dfs.ha.namenodes.xxx:自定义 namenode 的名字,多个名字以逗号分隔。其中 xxx 为 dfs.nameservices 中自定义的名字,如 "dfs.ha.namenodes.my_ha" = "my_nn" - dfs.namenode.rpc-address.xxx.nn:指定 namenode 的rpc地址信息。其中 nn 表示 dfs.ha.namenodes.xxx 中配置的 namenode 的名字,如:"dfs.namenode.rpc-address.my_ha.my_nn" = "host:port" - dfs.client.failover.proxy.provider:指定 client 连接 namenode 的 provider,默认为:org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider - - 5. opt_properties - - 用于指定一些特殊参数。 - 语法: - [PROPERTIES ("key"="value", ...)] - - 可以指定如下参数: - timeout: 指定导入操作的超时时间。默认超时为4小时。单位秒。 - max_filter_ratio:最大容忍可过滤(数据不规范等原因)的数据比例。默认零容忍。 - exec_mem_limit: 导入内存限制。默认为 2GB。单位为字节。 - strict mode: 是否对数据进行严格限制。默认为 false。 - timezone: 指定某些受时区影响的函数的时区,如 strftime/alignment_timestamp/from_unixtime 等等,具体请查阅 [时区] 文档。如果不指定,则使用 "Asia/Shanghai" 时区。 - send_batch_parallelism: 用于设置发送批处理数据的并行度,如果并行度的值超过 BE 配置中的 `max_send_batch_parallelism_per_job`,那么作为协调点的 BE 将使用 `max_send_batch_parallelism_per_job` 的值。 - load_to_single_tablet: 布尔类型,为true表示支持一个任务只导入数据到对应分区的一个tablet,默认值为false,作业的任务数取决于整体并发度。该参数只允许在对带有random分区的olap表导数的时候设置。 - - 6. 导入数据格式样例 - - 整型类(TINYINT/SMALLINT/INT/BIGINT/LARGEINT):1, 1000, 1234 - 浮点类(FLOAT/DOUBLE/DECIMAL):1.1, 0.23, .356 - 日期类(DATE/DATETIME):2017-10-03, 2017-06-13 12:34:03。 - (注:如果是其他日期格式,可以在导入命令中,使用 strftime 或者 time_format 函数进行转换) - 字符串类(CHAR/VARCHAR):"I am a student", "a" - NULL值:\N - -## example - - 1. 从 HDFS 导入一批数据,指定超时时间和过滤比例。使用明文 my_hdfs_broker 的 broker。简单认证。 - - LOAD LABEL example_db.label1 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - ) - WITH BROKER my_hdfs_broker - ( - "username" = "hdfs_user", - "password" = "hdfs_passwd" - ) - PROPERTIES - ( - "timeout" = "3600", - "max_filter_ratio" = "0.1" - ); - - 其中 hdfs_host 为 namenode 的 host,hdfs_port 为 fs.defaultFS 端口(默认9000) - - 2. 从 AFS 一批数据,包含多个文件。导入不同的 table,指定分隔符,指定列对应关系。 - - LOAD LABEL example_db.label2 - ( - DATA INFILE("afs://afs_host:hdfs_port/user/palo/data/input/file1") - INTO TABLE `my_table_1` - COLUMNS TERMINATED BY "," - (k1, k3, k2, v1, v2), - DATA INFILE("afs://afs_host:hdfs_port/user/palo/data/input/file2") - INTO TABLE `my_table_2` - COLUMNS TERMINATED BY "\t" - (k1, k2, k3, v2, v1) - ) - WITH BROKER my_afs_broker - ( - "username" = "afs_user", - "password" = "afs_passwd" - ) - PROPERTIES - ( - "timeout" = "3600", - "max_filter_ratio" = "0.1" - ); - - - 3. 从 HDFS 导入一批数据,指定hive的默认分隔符\x01,并使用通配符*指定目录下的所有文件。 - 使用简单认证,同时配置 namenode HA - - LOAD LABEL example_db.label3 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*") - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\\x01" - ) - WITH BROKER my_hdfs_broker - ( - "username" = "hdfs_user", - "password" = "hdfs_passwd", - "dfs.nameservices" = "my_ha", - "dfs.ha.namenodes.my_ha" = "my_namenode1, my_namenode2", - "dfs.namenode.rpc-address.my_ha.my_namenode1" = "nn1_host:rpc_port", - "dfs.namenode.rpc-address.my_ha.my_namenode2" = "nn2_host:rpc_port", - "dfs.client.failover.proxy.provider" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ) - - 4. 从 HDFS 导入一批“负”数据。同时使用 kerberos 认证方式。提供 keytab 文件路径。 - - LOAD LABEL example_db.label4 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file") - NEGATIVE - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\t" - ) - WITH BROKER my_hdfs_broker - ( - "hadoop.security.authentication" = "kerberos", - "kerberos_principal"="doris@YOUR.COM", - "kerberos_keytab"="/home/palo/palo.keytab" - ) - - 5. 从 HDFS 导入一批数据,指定分区。同时使用 kerberos 认证方式。提供 base64 编码后的 keytab 文件内容。 - - LOAD LABEL example_db.label5 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, k3, k2, v1, v2) - ) - WITH BROKER my_hdfs_broker - ( - "hadoop.security.authentication"="kerberos", - "kerberos_principal"="doris@YOUR.COM", - "kerberos_keytab_content"="BQIAAABEAAEACUJBSURVLkNPTQAEcGFsbw" - ) - - 6. 从 BOS 导入一批数据,指定分区, 并对导入文件的列做一些转化,如下: - 表结构为: - k1 varchar(20) - k2 int - - 假设数据文件只有一行数据: - - Adele,1,1 - - 数据文件中各列,对应导入语句中指定的各列: - k1,tmp_k2,tmp_k3 - - 转换如下: - - 1) k1: 不变换 - 2) k2:是 tmp_k2 和 tmp_k3 数据之和 - - LOAD LABEL example_db.label6 - ( - DATA INFILE("bos://my_bucket/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, tmp_k2, tmp_k3) - SET ( - k2 = tmp_k2 + tmp_k3 - ) - ) - WITH BROKER my_bos_broker - ( - "bos_endpoint" = "http://bj.bcebos.com", - "bos_accesskey" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "bos_secret_accesskey"="yyyyyyyyyyyyyyyyyyyy" - ) - - 7. 导入数据到含有HLL列的表,可以是表中的列或者数据里面的列 - - 如果表中有4列分别是(id,v1,v2,v3)。其中v1和v2列是hll列。导入的源文件有3列, 其中表中的第一列 = 源文件中的第一列,而表中的第二,三列为源文件中的第二,三列变换得到,表中的第四列在源文件中并不存在。 - 则(column_list)中声明第一列为id,第二三列为一个临时命名的k1,k2。 - 在SET中必须给表中的hll列特殊声明 hll_hash。表中的v1列等于原始数据中的hll_hash(k1)列, 表中的v3列在原始数据中并没有对应的值,使用empty_hll补充默认值。 - LOAD LABEL example_db.label7 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (id, k1, k2) - SET ( - v1 = hll_hash(k1), - v2 = hll_hash(k2), - v3 = empty_hll() - ) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - LOAD LABEL example_db.label8 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, k2, tmp_k3, tmp_k4, v1, v2) - SET ( - v1 = hll_hash(tmp_k3), - v2 = hll_hash(tmp_k4) - ) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - 8. 导入Parquet文件中数据 指定FORMAT 为parquet, 默认是通过文件后缀判断 - - LOAD LABEL example_db.label9 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - FORMAT AS "parquet" - (k1, k2, k3) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - 9. 提取文件路径中的分区字段 - - 如果需要,则会根据表中定义的字段类型解析文件路径中的分区字段(partitioned fields),类似Spark中Partition Discovery的功能 - - LOAD LABEL example_db.label10 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/*/*") - INTO TABLE `my_table` - FORMAT AS "csv" - (k1, k2, k3) - COLUMNS FROM PATH AS (city, utc_date) - SET (uniq_id = md5sum(k1, city)) - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing目录下包括如下文件: - - [hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/utc_date=2019-06-26/0000.csv, hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/utc_date=2019-06-26/0001.csv, ...] - - 则提取文件路径的中的city和utc_date字段 - - 10. 对待导入数据进行过滤,k1 值大于 k2 值的列才能被导入 - - LOAD LABEL example_db.label10 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - where k1 > k2 - ) - - 11. 从 AmazonS3 导入Parquet文件中数据,指定 FORMAT 为parquet,默认是通过文件后缀判断: - - LOAD LABEL example_db.label11 - ( - DATA INFILE("s3a://my_bucket/input/file") - INTO TABLE `my_table` - FORMAT AS "parquet" - (k1, k2, k3) - ) - WITH BROKER my_s3a_broker - ( - "fs.s3a.access.key" = "xxxxxxxxxxxxxxxxxxxxxxxxxx", - "fs.s3a.secret.key" = "yyyyyyyyyyyyyyyyyyyy", - "fs.s3a.endpoint" = "s3.amazonaws.com" - ) - - 12. 提取文件路径中的时间分区字段,并且时间包含 %3A (在 hdfs 路径中,不允许有 ':',所有 ':' 会由 %3A 替换) - - 假设有如下文件: - - /user/data/data_time=2020-02-17 00%3A00%3A00/test.txt - /user/data/data_time=2020-02-18 00%3A00%3A00/test.txt - - 表结构为: - data_time DATETIME, - k2 INT, - k3 INT - - LOAD LABEL example_db.label12 - ( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl12` - COLUMNS TERMINATED BY "," - (k2,k3) - COLUMNS FROM PATH AS (data_time) - SET (data_time=str_to_date(data_time, '%Y-%m-%d %H%%3A%i%%3A%s')) - ) - WITH BROKER "hdfs" ("username"="user", "password"="pass"); - - 13. 从 HDFS 导入一批数据,指定超时时间和过滤比例。使用明文 my_hdfs_broker 的 broker。简单认证。并且将原有数据中与 导入数据中v2 大于100 的列相匹配的列删除,其他列正常导入 - - LOAD LABEL example_db.label1 - ( - MERGE DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\t" - (k1, k2, k3, v2, v1) - ) - DELETE ON v2 >100 - WITH BROKER my_hdfs_broker - ( - "username" = "hdfs_user", - "password" = "hdfs_passwd" - ) - PROPERTIES - ( - "timeout" = "3600", - "max_filter_ratio" = "0.1" - ); - - 14. 导入时指定source_sequence列,保证UNIQUE_KEYS表中的替换顺序: - LOAD LABEL example_db.label_sequence - ( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl1` - COLUMNS TERMINATED BY "," - (k1,k2,source_sequence,v1,v2) - ORDER BY source_sequence - ) - with BROKER "hdfs" ("username"="user", "password"="pass"); - - 15. 先过滤原始数据,在进行列的映射、转换和过滤操作 - - LOAD LABEL example_db.label_filter - ( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl1` - COLUMNS TERMINATED BY "," - (k1,k2,v1,v2) - SET (k1 = k1 +1) - PRECEDING FILTER k1 > 2 - WHERE k1 > 3 - ) - with BROKER "hdfs" ("username"="user", "password"="pass"); - - 16. 导入json文件中数据 指定FORMAT为json, 默认是通过文件后缀判断,设置读取数据的参数 - - LOAD LABEL example_db.label9 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - FORMAT AS "json" - (k1, k2, k3) - properties("fuzzy_parse"="true", "strip_outer_array"="true") - ) - WITH BROKER hdfs ("username"="hdfs_user", "password"="hdfs_password"); - - 17. LOAD WITH HDFS, 普通HDFS集群 - LOAD LABEL example_db.label_filter - ( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl1` - COLUMNS TERMINATED BY "," - (k1,k2,v1,v2) - ) - with HDFS ( - "fs.defaultFS"="hdfs://testFs", - "hdfs_user"="user" - ); - - 18. LOAD WITH HDFS, 带ha的HDFS集群 - LOAD LABEL example_db.label_filter - ( - DATA INFILE("hdfs://host:port/user/data/*/test.txt") - INTO TABLE `tbl1` - COLUMNS TERMINATED BY "," - (k1,k2,v1,v2) - ) - with HDFS ( - "fs.defaultFS"="hdfs://testFs", - "hdfs_user"="user", - "dfs.nameservices"="my_ha", - "dfs.ha.namenodes.xxx"="my_nn1,my_nn2", - "dfs.namenode.rpc-address.xxx.my_nn1"="host1:port", - "dfs.namenode.rpc-address.xxx.my_nn2"="host2:port", - "dfs.client.failover.proxy.provider.xxx"="org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider" - ); - -## keyword - - BROKER,LOAD diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md deleted file mode 100644 index 0ab78885e2..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CANCEL LOAD.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -{ - "title": "CANCEL LOAD", - "language": "zh-CN" -} ---- - - - -# CANCEL LOAD -## description - - 该语句用于撤销指定 load label 的批次的导入作业。 - 这是一个异步操作,任务提交成功则返回。执行后可使用 SHOW LOAD 命令查看进度。 - 语法: - CANCEL LOAD - [FROM db_name] - WHERE LABEL = "load_label"; - -## example - - 1. 撤销数据库 example_db 上, label 为 example_db_test_load_label 的导入作业 - CANCEL LOAD - FROM example_db - WHERE LABEL = "example_db_test_load_label"; - -## keyword - CANCEL,LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md deleted file mode 100644 index 67051735cb..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/CREATE SYNC JOB.md +++ /dev/null @@ -1,166 +0,0 @@ ---- -{ - "title": "CREATE SYNC JOB", - "language": "zh-CN" -} ---- - - - -# CREATE SYNC JOB - -## description - -数据同步(Sync Job)功能,支持用户提交一个常驻的数据同步作业,通过从指定的远端地址读取Binlog日志,增量同步用户在Mysql数据库的对数据更新操作的CDC(Change Data Capture)功能。 - -目前数据同步作业只支持对接Canal,从Canal Server上获取解析好的Binlog数据,导入到Doris内。 - -用户可通过 `SHOW SYNC JOB` 查看数据同步作业状态。 - -语法: - -``` -CREATE SYNC [db.]job_name - ( - channel_desc, - channel_desc - ... - ) -binlog_desc -``` - -1. `job_name` - - 同步作业名称,是作业在当前数据库内的唯一标识,相同`job_name`的作业只能有一个在运行。 - -2. `channel_desc` - - 作业下的数据通道,用来描述mysql源表到doris目标表的映射关系。 - - 语法: - - ``` - FROM mysql_db.src_tbl INTO des_tbl - [partitions] - [columns_mapping] - ``` - - 1. `mysql_db.src_tbl` - - 指定mysql端的数据库和源表。 - - 2. `des_tbl` - - 指定doris端的目标表,只支持Unique表,且需开启表的batch delete功能(开启方法请看help alter table的'批量删除功能')。 - - 3. `partitions` - - 指定导入目的表的哪些 partition 中。如果不指定,则会自动导入到对应的 partition 中。 - - 示例: - - ``` - PARTITION(p1, p2, p3) - ``` - - 4. `column_mapping` - - 指定mysql源表和doris目标表的列之间的映射关系。如果不指定,FE会默认源表和目标表的列按顺序一一对应。 - - 不支持 col_name = expr 的形式表示列。 - - 示例: - - ``` - 假设目标表列为(k1, k2, v1), - - 改变列k1和k2的顺序 - COLUMNS(k2, k1, v1) - - 忽略源数据的第四列 - COLUMNS(k2, k1, v1, dummy_column) - ``` - -3. `binlog_desc` - - 用来描述远端数据源,目前仅支持canal一种。 - - 语法: - - ``` - FROM BINLOG - ( - "key1" = "value1", - "key2" = "value2" - ) - ``` - - 1. Canal 数据源对应的属性,以`canal.`为前缀 - - 1. canal.server.ip: canal server的地址 - 2. canal.server.port: canal server的端口 - 3. canal.destination: instance的标识 - 4. canal.batchSize: 获取的batch大小的最大值,默认8192 - 5. canal.username: instance的用户名 - 6. canal.password: instance的密码 - 7. canal.debug: 可选,设置为true时,会将batch和每一行数据的详细信息都打印出来 - -## example - -1. 简单为 `test_db` 的 `test_tbl` 创建一个名为 `job1` 的数据同步作业,连接本地的Canal服务器,对应Mysql源表 `mysql_db1.tbl1`。 - - CREATE SYNC `test_db`.`job1` - ( - FROM `mysql_db1`.`tbl1` INTO `test_tbl ` - ) - FROM BINLOG - ( - "type" = "canal", - "canal.server.ip" = "127.0.0.1", - "canal.server.port" = "11111", - "canal.destination" = "example", - "canal.username" = "", - "canal.password" = "" - ); - -2. 为 `test_db` 的多张表创建一个名为 `job1` 的数据同步作业,一一对应多张Mysql源表,并显式的指定列映射。 - - CREATE SYNC `test_db`.`job1` - ( - FROM `mysql_db`.`t1` INTO `test1` COLUMNS(k1, k2, v1) PARTITIONS (p1, p2), - FROM `mysql_db`.`t2` INTO `test2` COLUMNS(k3, k4, v2) PARTITION p1 - ) - FROM BINLOG - ( - "type" = "canal", - "canal.server.ip" = "xx.xxx.xxx.xx", - "canal.server.port" = "12111", - "canal.destination" = "example", - "canal.username" = "username", - "canal.password" = "password" - ); - -## keyword - - CREATE,SYNC,JOB,BINLOG - - - - \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/DELETE.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/DELETE.md deleted file mode 100644 index 4f7de606ec..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/DELETE.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -{ - "title": "DELETE", - "language": "zh-CN" -} ---- - - - -# DELETE -## description - - 该语句用于按条件删除指定 table(base index) partition 中的数据。 - 该操作会同时删除和此 base index 相关的 rollup index 的数据。 - 语法: - DELETE FROM table_name [PARTITION partition_name | PARTITIONS (p1, p2)] - WHERE - column_name1 op { value | value_list } [ AND column_name2 op { value | value_list } ...]; - - 说明: - 1) op 的可选类型包括:=, >, <, >=, <=, !=, in, not in - 2) 只能指定 key 列上的条件。 - 2) 当选定的 key 列不存在于某个 rollup 中时,无法进行 delete。 - 3) 条件之间只能是“与”的关系。 - 若希望达成“或”的关系,需要将条件分写在两个 DELETE 语句中。 - 4) 如果为分区表,可以指定分区,如不指定,且会话变量 delete_without_partition 为 true,则会应用到所有分区。如果是单分区表,可以不指定。 - - 注意: - 该语句可能会降低执行后一段时间内的查询效率。 - 影响程度取决于语句中指定的删除条件的数量。 - 指定的条件越多,影响越大。 - -## example - - 1. 删除 my_table partition p1 中 k1 列值为 3 的数据行 - DELETE FROM my_table PARTITION p1 - WHERE k1 = 3; - - 2. 删除 my_table partition p1 中 k1 列值大于等于 3 且 k2 列值为 "abc" 的数据行 - DELETE FROM my_table PARTITION p1 - WHERE k1 >= 3 AND k2 = "abc"; - - 3. 删除 my_table partition p1, p2 中 k1 列值大于等于 3 且 k2 列值为 "abc" 的数据行 - DELETE FROM my_table PARTITIONS (p1, p2) - WHERE k1 >= 3 AND k2 = "abc"; - -## keyword - DELETE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/EXPORT.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/EXPORT.md deleted file mode 100644 index e545c07140..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/EXPORT.md +++ /dev/null @@ -1,126 +0,0 @@ ---- -{ - "title": "EXPORT", - "language": "zh-CN" -} ---- - - - -# EXPORT -## description - - 该语句用于将指定表的数据导出到指定位置。 - 该功能通过 broker 进程实现。对于不同的目的存储系统,需要部署不同的 broker。可以通过 SHOW BROKER 查看已部署的 broker。 - 这是一个异步操作,任务提交成功则返回。执行后可使用 SHOW EXPORT 命令查看进度。 - - 语法: - EXPORT TABLE table_name - [PARTITION (p1[,p2])] - [WHERE [expr]] - TO export_path - [opt_properties] - [broker|S3]; - - 1. table_name - 当前要导出的表的表名,目前支持engine为olap和mysql的表的导出。 - - 2. partition - 可以只导出指定表的某些指定分区 - - 3. expr - 导出满足 where 条件的行,选填。不填则默认导出所有行。 - - 4. export_path - 导出的路径,需为目录。支持导出到本地,hdfs,百度bos,s3协议的其他存储系统。 - - 5. opt_properties - 用于指定一些特殊参数。 - 语法: - [PROPERTIES ("key"="value", ...)] - - 可以指定如下参数: - label: 指定一个自定义作业标识。后续可以使用这个标识查看作业状态。 - column_separator: 指定导出的列分隔符,默认为\t。支持不可见字符,比如 '\x07'。 - column: 指定待导出的列,使用英文逗号隔开,如果不填这个参数默认是导出表的所有列。 - line_delimiter: 指定导出的行分隔符,默认为\n。支持不可见字符,比如 '\x07'。 - exec_mem_limit: 导出在单个 BE 节点的内存使用上限,默认为 2GB,单位为字节。 - timeout:导入作业的超时时间,默认为1天,单位是秒。 - tablet_num_per_task:每个子任务能分配的最大 Tablet 数量。 - - 6. broker|s3 - 指定使用broker导出或者通过S3协议导出 - 语法: - WITH [BROKER broker_name | S3] ("key"="value"[,...]) - 这里需要指定具体的broker name, 以及所需的broker属性, 如果使用S3协议则无需指定broker name - - 对于不同存储系统对应的 broker,这里需要输入的参数不同。具体参数可以参阅:`help broker load` 中 broker 所需属性。 - 导出到本地时,不需要填写这部分。 - - 7. hdfs - 指定导出到hdfs - 语法: - WITH HDFS ("key"="value"[,...]) - - 可以指定如下参数: - fs.defaultFS: 指定HDFS的fs,格式为:hdfs://ip:port - hdfs_user:指定写入HDFS的user - -## example - - 1. 将 testTbl 表中的所有数据导出到 hdfs 上 - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - - 2. 将 testTbl 表中的分区p1,p2导出到 hdfs 上 - EXPORT TABLE testTbl PARTITION (p1,p2) TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - - 3. 将 testTbl 表中的所有数据导出到 hdfs 上,以","作为列分隔符,并指定label - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("label" = "mylabel", "column_separator"=",") WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - - 4. 将 testTbl 表中 k1 = 1 的行导出到 hdfs 上。 - EXPORT TABLE testTbl WHERE k1=1 TO "hdfs://hdfs_host:port/a/b/c" WITH BROKER "broker_name" ("username"="xxx", "password"="yyy"); - - 5. 将 testTbl 表中的所有数据导出到本地。 - EXPORT TABLE testTbl TO "file:///home/data/a"; - - 6. 将 testTbl 表中的所有数据导出到 hdfs 上,以不可见字符 "\x07" 作为列或者行分隔符。 - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("column_separator"="\\x07", "line_delimiter" = "\\x07") WITH BROKER "broker_name" ("username"="xxx", "password"="yyy") - - 7. 将 testTbl 表的 k1, v1 列导出到本地。 - EXPORT TABLE testTbl TO "file:///home/data/a" PROPERTIES ("columns" = "k1,v1"); - - 8. 将 testTbl 表中的所有数据导出到 hdfs 上,以不可见字符 "\x07" 作为列或者行分隔符。 - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("column_separator"="\\x07", "line_delimiter" = "\\x07") WITH HDFS ("fs.defaultFS"="hdfs://hdfs_host:port", "hdfs_user"="yyy") - - 9. 将 testTbl 表中的所有数据导出到本地, 第一行表示字段名称 - EXPORT TABLE testTbl TO "file:///home/data/a" PROPERTIES ("label" = "mylabel", "format"="csv_with_names"); - - 10. 将 testTbl 表中的所有数据导出到本地, 前两行表示字段名称和类型 - EXPORT TABLE testTbl TO "file:///home/data/a" PROPERTIES ("label" = "mylabel", "format"="csv_with_names_and_types"); - - 11. 将 testTbl 表中的所有数据导出到 hdfs, 第一行表示字段名称 - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("label" = "mylabel", "format"="csv_with_names") WITH BROKER "broker_name" ("username"="myname", "password"="mypassword"); - - 12. 将 testTbl 表中的所有数据导出到 hdfs, 前两行表示字段名称和类型 - EXPORT TABLE testTbl TO "hdfs://hdfs_host:port/a/b/c" PROPERTIES ("label" = "mylabel", "format"="csv_with_names_and_types") WITH BROKER "broker_name" ("username"="myname", "password"="mypassword"); - -## keyword - EXPORT - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/GROUP BY.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/GROUP BY.md deleted file mode 100644 index 30a96f1e1b..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/GROUP BY.md +++ /dev/null @@ -1,170 +0,0 @@ ---- -{ - "title": "GROUP BY", - "language": "zh-CN" -} ---- - - - -# GROUP BY - -## description - - GROUP BY `GROUPING SETS` | `CUBE` | `ROLLUP` 是对 GROUP BY 子句的扩展,它能够在一个 GROUP BY 子句中实现多个集合的分组的聚合。其结果等价于将多个相应 GROUP BY 子句进行 UNION 操作。 - - GROUP BY 子句是只含有一个元素的 GROUP BY GROUPING SETS 的特例。 - 例如,GROUPING SETS 语句: - - ``` - SELECT a, b, SUM( c ) FROM tab1 GROUP BY GROUPING SETS ( (a, b), (a), (b), ( ) ); - ``` - - 其查询结果等价于: - - ``` - SELECT a, b, SUM( c ) FROM tab1 GROUP BY a, b - UNION - SELECT a, null, SUM( c ) FROM tab1 GROUP BY a - UNION - SELECT null, b, SUM( c ) FROM tab1 GROUP BY b - UNION - SELECT null, null, SUM( c ) FROM tab1 - ``` - - `GROUPING(expr)` 指示一个列是否为聚合列,如果是聚合列为0,否则为1 - - `GROUPING_ID(expr [ , expr [ , ... ] ])` 与GROUPING 类似, GROUPING_ID根据指定的column 顺序,计算出一个列列表的 bitmap 值,每一位为GROUPING的值. GROUPING_ID()函数返回位向量的十进制值。 - -### Syntax - - ``` - SELECT ... - FROM ... - [ ... ] - GROUP BY [ - , ... | - GROUPING SETS [, ...] ( groupSet [ , groupSet [ , ... ] ] ) | - ROLLUP(expr [ , expr [ , ... ] ]) | - expr [ , expr [ , ... ] ] WITH ROLLUP | - CUBE(expr [ , expr [ , ... ] ]) | - expr [ , expr [ , ... ] ] WITH CUBE - ] - [ ... ] - ``` - -### Parameters - - `groupSet` 表示 select list 中的列,别名或者表达式组成的集合 `groupSet ::= { ( expr [ , expr [ , ... ] ] )}` - - `expr` 表示 select list 中的列,别名或者表达式 - -### Note - - doris 支持类似PostgreSQL 语法, 语法实例如下 - - ``` - SELECT a, b, SUM( c ) FROM tab1 GROUP BY GROUPING SETS ( (a, b), (a), (b), ( ) ); - SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY ROLLUP(a,b,c) - SELECT a, b,c, SUM( d ) FROM tab1 GROUP BY CUBE(a,b,c) - ``` - - `ROLLUP(a,b,c)` 等价于如下`GROUPING SETS` 语句 - - ``` - GROUPING SETS ( - (a,b,c), - ( a, b ), - ( a), - ( ) - ) - ``` - - `CUBE ( a, b, c )` 等价于如下`GROUPING SETS` 语句 - - ``` - GROUPING SETS ( - ( a, b, c ), - ( a, b ), - ( a, c ), - ( a ), - ( b, c ), - ( b ), - ( c ), - ( ) - ) - ``` - -## example - - 下面是一个实际数据的例子 - - ``` - > SELECT * FROM t; - +------+------+------+ - | k1 | k2 | k3 | - +------+------+------+ - | a | A | 1 | - | a | A | 2 | - | a | B | 1 | - | a | B | 3 | - | b | A | 1 | - | b | A | 4 | - | b | B | 1 | - | b | B | 5 | - +------+------+------+ - 8 rows in set (0.01 sec) - - > SELECT k1, k2, SUM(k3) FROM t GROUP BY GROUPING SETS ( (k1, k2), (k2), (k1), ( ) ); - +------+------+-----------+ - | k1 | k2 | sum(`k3`) | - +------+------+-----------+ - | b | B | 6 | - | a | B | 4 | - | a | A | 3 | - | b | A | 5 | - | NULL | B | 10 | - | NULL | A | 8 | - | a | NULL | 7 | - | b | NULL | 11 | - | NULL | NULL | 18 | - +------+------+-----------+ - 9 rows in set (0.06 sec) - - > SELECT k1, k2, GROUPING_ID(k1,k2), SUM(k3) FROM t GROUP BY GROUPING SETS ((k1, k2), (k1), (k2), ()); - +------+------+---------------+----------------+ - | k1 | k2 | grouping_id(k1,k2) | sum(`k3`) | - +------+------+---------------+----------------+ - | a | A | 0 | 3 | - | a | B | 0 | 4 | - | a | NULL | 1 | 7 | - | b | A | 0 | 5 | - | b | B | 0 | 6 | - | b | NULL | 1 | 11 | - | NULL | A | 2 | 8 | - | NULL | B | 2 | 10 | - | NULL | NULL | 3 | 18 | - +------+------+---------------+----------------+ - 9 rows in set (0.02 sec) - ``` - -## keyword - - GROUP, GROUPING, GROUPING_ID, GROUPING_SETS, GROUPING SETS, CUBE, ROLLUP diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/LOAD.md deleted file mode 100644 index 0278ce7148..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/LOAD.md +++ /dev/null @@ -1,298 +0,0 @@ ---- -{ - "title": "LOAD", - "language": "zh-CN" -} ---- - - - -# LOAD -## description - - Palo 目前支持以下4种导入方式: - - 1. Hadoop Load:基于 MR 进行 ETL 的导入。 - 2. Broker Load:使用 broker 进行进行数据导入。 - 3. Mini Load:通过 http 协议上传文件进行批量数据导入。 - 4. Stream Load:通过 http 协议进行流式数据导入。 - 5. S3 Load: 通过S3协议直接访问支持S3协议的存储系统进行数据导入, 导入的语法与Broker Load 基本相同。 - - 本帮助主要描述第一种导入方式,即 Hadoop Load 相关帮助信息。其余导入方式可以使用以下命令查看帮助: - - !!!该导入方式可能在后续某个版本即不再支持,建议使用其他导入方式进行数据导入。!!! - - 1. help broker load; - 2. help mini load; - 3. help stream load; - - Hadoop Load 仅适用于百度内部环境。公有云、私有云以及开源环境无法使用这种导入方式。 - 该导入方式必须设置用于 ETL 的 Hadoop 计算队列,设置方式可以通过 help set property 命令查看帮助。 - -语法: - - LOAD LABEL load_label - ( - data_desc1[, data_desc2, ...] - ) - [opt_properties]; - - 1. load_label - - 当前导入批次的标签。在一个 database 内唯一。 - 语法: - [database_name.]your_label - - 2. data_desc - - 用于描述一批导入数据。 - 语法: - DATA INFILE - ( - "file_path1"[, file_path2, ...] - ) - [NEGATIVE] - INTO TABLE `table_name` - [PARTITION (p1, p2)] - [COLUMNS TERMINATED BY "column_separator"] - [FORMAT AS "file_type"] - [(column_list)] - [COLUMNS FROM PATH AS (columns_from_path)] - [SET (k1 = func(k2))] - - 说明: - file_path: - - 文件路径,可以指定到一个文件,也可以用 * 通配符指定某个目录下的所有文件。通配符必须匹配到文件,而不能是目录。 - - PARTITION: - - 如果指定此参数,则只会导入指定的分区,导入分区以外的数据会被过滤掉。 - 如果不指定,默认导入table的所有分区。 - - NEGATIVE: - 如果指定此参数,则相当于导入一批“负”数据。用于抵消之前导入的同一批数据。 - 该参数仅适用于存在 value 列,并且 value 列的聚合类型仅为 SUM 的情况。 - - column_separator: - - 用于指定导入文件中的列分隔符。默认为 \t - 如果是不可见字符,则需要加\\x作为前缀,使用十六进制来表示分隔符。 - 如hive文件的分隔符\x01,指定为"\\x01" - - file_type: - - 用于指定导入文件的类型,例如:parquet、orc、csv。默认值通过文件后缀名判断。 - - column_list: - - 用于指定导入文件中的列和 table 中的列的对应关系。 - 当需要跳过导入文件中的某一列时,将该列指定为 table 中不存在的列名即可。 - 语法: - (col_name1, col_name2, ...) - - columns_from_path: - - 用于指定需要从文件路径中解析的字段。 - 语法: - (col_from_path_name1, col_from_path_name2, ...) - - SET: - - 如果指定此参数,可以将源文件某一列按照函数进行转化,然后将转化后的结果导入到table中。 - 目前支持的函数有: - - strftime(fmt, column) 日期转换函数 - fmt: 日期格式,形如%Y%m%d%H%M%S (年月日时分秒) - column: column_list中的列,即输入文件中的列。存储内容应为数字型的时间戳。 - 如果没有column_list,则按照palo表的列顺序默认输入文件的列。 - - time_format(output_fmt, input_fmt, column) 日期格式转化 - output_fmt: 转化后的日期格式,形如%Y%m%d%H%M%S (年月日时分秒) - input_fmt: 转化前column列的日期格式,形如%Y%m%d%H%M%S (年月日时分秒) - column: column_list中的列,即输入文件中的列。存储内容应为input_fmt格式的日期字符串。 - 如果没有column_list,则按照palo表的列顺序默认输入文件的列。 - - alignment_timestamp(precision, column) 将时间戳对齐到指定精度 - precision: year|month|day|hour - column: column_list中的列,即输入文件中的列。存储内容应为数字型的时间戳。 - 如果没有column_list,则按照palo表的列顺序默认输入文件的列。 - 注意:对齐精度为year、month的时候,只支持20050101~20191231范围内的时间戳。 - - default_value(value) 设置某一列导入的默认值 - 不指定则使用建表时列的默认值 - - md5sum(column1, column2, ...) 将指定的导入列的值求md5sum,返回32位16进制字符串 - - replace_value(old_value[, new_value]) 将导入文件中指定的old_value替换为new_value - new_value如不指定则使用建表时列的默认值 - - hll_hash(column) 用于将表或数据里面的某一列转化成HLL列的数据结构 - - 3. opt_properties - - 用于指定一些特殊参数。 - 语法: - [PROPERTIES ("key"="value", ...)] - - 可以指定如下参数: - cluster: 导入所使用的 Hadoop 计算队列。 - timeout: 指定导入操作的超时时间。默认超时为3天。单位秒。 - max_filter_ratio:最大容忍可过滤(数据不规范等原因)的数据比例。默认零容忍。 - load_delete_flag:指定该导入是否通过导入key列的方式删除数据,仅适用于UNIQUE KEY, - 导入时可不指定value列。默认为false。 - - 5. 导入数据格式样例 - - 整型类(TINYINT/SMALLINT/INT/BIGINT/LARGEINT):1, 1000, 1234 - 浮点类(FLOAT/DOUBLE/DECIMAL):1.1, 0.23, .356 - 日期类(DATE/DATETIME):2017-10-03, 2017-06-13 12:34:03。 - (注:如果是其他日期格式,可以在导入命令中,使用 strftime 或者 time_format 函数进行转换) - 字符串类(CHAR/VARCHAR):"I am a student", "a" - NULL值:\N - - 6. S3等对象存储导入参数 - fs.s3a.access.key 用户AK,必填 - fs.s3a.secret.key 用户SK,必填 - fs.s3a.endpoint 用户终端,必填 - fs.s3a.impl.disable.cache 是否启用缓存,默认true,可选 - -## example - - 1. 导入一批数据,指定超时时间和过滤比例。指定导入队列为 my_cluster。 - - LOAD LABEL example_db.label1 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - ) - PROPERTIES - ( - "cluster" = "my_cluster", - "timeout" = "3600", - "max_filter_ratio" = "0.1" - ); - - 其中 hdfs_host 为 namenode 的 host,hdfs_port 为 fs.defaultFS 端口(默认9000) - - 2. 导入一批数据,包含多个文件。导入不同的 table,指定分隔符,指定列对应关系 - - LOAD LABEL example_db.label2 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file1") - INTO TABLE `my_table_1` - COLUMNS TERMINATED BY "," - (k1, k3, k2, v1, v2), - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file2") - INTO TABLE `my_table_2` - COLUMNS TERMINATED BY "\t" - (k1, k2, k3, v2, v1) - ); - - 3. 导入一批数据,指定hive的默认分隔符\x01,并使用通配符*指定目录下的所有文件 - - LOAD LABEL example_db.label3 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*") - NEGATIVE - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\\x01" - ); - - 4. 导入一批“负”数据 - - LOAD LABEL example_db.label4 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/old_file) - NEGATIVE - INTO TABLE `my_table` - COLUMNS TERMINATED BY "\t" - ); - - 5. 导入一批数据,指定分区 - - LOAD LABEL example_db.label5 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, k3, k2, v1, v2) - ); - - 6. 导入一批数据,指定分区, 并对导入文件的列做一些转化,如下: - 表结构为: - k1 datetime - k2 date - k3 bigint - k4 varchar(20) - k5 varchar(64) - k6 int - - 假设数据文件只有一行数据,5列,逗号分隔: - - 1537002087,2018-08-09 11:12:13,1537002087,-,1 - - 数据文件中各列,对应导入语句中指定的各列: - tmp_k1, tmp_k2, tmp_k3, k6, v1 - - 转换如下: - - 1) k1:将 tmp_k1 时间戳列转化为 datetime 类型的数据 - 2) k2:将 tmp_k2 datetime 类型的数据转化为 date 的数据 - 3) k3:将 tmp_k3 时间戳列转化为天级别时间戳 - 4) k4:指定导入默认值为1 - 5) k5:将 tmp_k1、tmp_k2、tmp_k3 列计算 md5 值 - 6) k6:将导入文件中的 - 值替换为 10 - - LOAD LABEL example_db.label6 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (tmp_k1, tmp_k2, tmp_k3, k6, v1) - SET ( - k1 = strftime("%Y-%m-%d %H:%M:%S", tmp_k1), - k2 = time_format("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", tmp_k2), - k3 = alignment_timestamp("day", tmp_k3), - k4 = default_value("1"), - k5 = md5sum(tmp_k1, tmp_k2, tmp_k3), - k6 = replace_value("-", "10") - ) - ); - - 7. 导入数据到含有HLL列的表,可以是表中的列或者数据里面的列 - - LOAD LABEL example_db.label7 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - SET ( - v1 = hll_hash(k1), - v2 = hll_hash(k2) - ) - ); - -## keyword - LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md deleted file mode 100644 index 9bbc7500b0..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/MINI LOAD.md +++ /dev/null @@ -1,139 +0,0 @@ ---- -{ - "title": "MINI LOAD", - "language": "zh-CN" -} ---- - - - -# MINI LOAD -## description - - MINI LOAD 和 STREAM LOAD 的导入实现方式完全一致。在导入功能支持上,MINI LOAD 的功能是 STREAM LOAD 的子集。 - 后续的导入新功能只会在 STREAM LOAD 中支持,MINI LOAD 将不再新增功能。建议改用 STREAM LOAD,具体使用方式请 HELP STREAM LOAD。 - - MINI LOAD 是 通过 http 协议完成的导入方式。用户可以不依赖 Hadoop,也无需通过 Mysql 客户端,即可完成导入。 - 用户通过 http 协议描述导入,数据在接受 http 请求的过程中被流式的导入 Doris , **导入作业完成后** 返回给用户导入的结果。 - - * 注:为兼容旧版本 mini load 使用习惯,用户依旧可以通过 'SHOW LOAD' 命令来查看导入结果。 - - 语法: - 导入: - - curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table}/_load?label=xxx - - 查看导入信息 - - curl -u user:passwd http://host:port/api/{db}/_load_info?label=xxx - - HTTP协议相关说明 - - 权限认证 当前 Doris 使用 http 的 Basic 方式权限认证。所以在导入的时候需要指定用户名密码 - 这种方式是明文传递密码的,暂不支持加密传输。 - - Expect Doris 需要发送过来的 http 请求带有 'Expect' 头部信息,内容为 '100-continue'。 - 为什么呢?因为我们需要将请求进行 redirect,那么必须在传输数据内容之前, - 这样可以避免造成数据的多次传输,从而提高效率。 - - Content-Length Doris 需要在发送请求时带有 'Content-Length' 这个头部信息。如果发送的内容比 - 'Content-Length' 要少,那么 Doris 认为传输出现问题,则提交此次任务失败。 - NOTE: 如果,发送的数据比 'Content-Length' 要多,那么 Doris 只读取 'Content-Length' - 长度的内容,并进行导入 - - - 参数说明: - - user: 用户如果是在default_cluster中的,user即为user_name。否则为user_name@cluster_name。 - - label: 用于指定这一批次导入的 label,用于后期进行作业查询等。 - 这个参数是必须传入的。 - - columns: 用于描述导入文件中对应的列名字。 - 如果不传入,那么认为文件中的列顺序与建表的顺序一致, - 指定的方式为逗号分隔,例如:columns=k1,k2,k3,k4 - - column_separator: 用于指定列与列之间的分隔符,默认的为'\t' - NOTE: 需要进行url编码,譬如 - 需要指定'\t'为分隔符,那么应该传入'column_separator=%09' - 需要指定'\x01'为分隔符,那么应该传入'column_separator=%01' - 需要指定','为分隔符,那么应该传入'column_separator=%2c' - - - max_filter_ratio: 用于指定允许过滤不规范数据的最大比例,默认是0,不允许过滤 - 自定义指定应该如下:'max_filter_ratio=0.2',含义是允许20%的错误率 - - timeout: 指定 load 作业的超时时间,单位是秒。当load执行时间超过该阈值时,会自动取消。默认超时时间是 600 秒。 - 建议指定 timeout 时间小于 86400 秒。 - - hll: 用于指定数据里面和表里面的HLL列的对应关系,表中的列和数据里面指定的列 - (如果不指定columns,则数据列面的列也可以是表里面的其它非HLL列)通过","分割 - 指定多个hll列使用“:”分割,例如: 'hll1,cuid:hll2,device' - - strict_mode: 指定当前导入是否使用严格模式,默认为 false。严格模式下,非空原始数据在列类型转化后结果为 NULL 的会被过滤。 - 指定方式为 'strict_mode=true' - - NOTE: - 1. 此种导入方式当前是在一台机器上完成导入工作,因而不宜进行数据量较大的导入工作。 - 建议导入数据量不要超过 1 GB - - 2. 当前无法使用 `curl -T "{file1, file2}"` 这样的方式提交多个文件,因为curl是将其拆成多个 - 请求发送的,多个请求不能共用一个label号,所以无法使用 - - 3. mini load 的导入方式和 streaming 完全一致,都是在流式的完成导入后,同步的返回结果给用户。 - 后续查询虽可以查到 mini load 的信息,但不能对其进行操作,查询只为兼容旧的使用方式。 - - 4. 当使用 curl 命令行导入时,需要在 & 前加入 \ 转义,否则参数信息会丢失。 - -## example - - 1. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表(用户是defalut_cluster中的) - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123 - - 2. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表(用户是test_cluster中的)。超时时间是 3600 秒 - curl --location-trusted -u root@test_cluster:root -T testData http://fe.host:port/api/testDb/testTbl/_load?label=123\&timeout=3600 - - 3. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表, 允许20%的错误率(用户是defalut_cluster中的) - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 - - 4. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表, 允许20%的错误率,并且指定文件的列名(用户是defalut_cluster中的) - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2\&columns=k1,k2,k3 - - 5. 使用streaming方式导入(用户是defalut_cluster中的) - seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_load?label=123 - - 6. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列(用户是defalut_cluster中的 - - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 - \&columns=k1,k2,k3\&hll=hll_column1,k1:hll_column2,k2 - - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&max_filter_ratio=0.2 - \&hll=hll_column1,tmp_k4:hll_column2,tmp_k5\&columns=k1,k2,k3,tmp_k4,tmp_k5 - - 7. 查看提交后的导入情况 - - curl -u root http://host:port/api/testDb/_load_info?label=123 - - 8. 指定非严格模式导入 - curl --location-trusted -u root -T testData http://host:port/api/testDb/testTbl/_load?label=123\&strict_mode=false - -## keyword - MINI, LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md deleted file mode 100644 index 526e7abb18..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/MULTI LOAD.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -{ - "title": "MULTI LOAD", - "language": "zh-CN" -} ---- - - - -# MULTI LOAD -## description - - Syntax: - curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_start?label=xxx - curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table1}/_load?label=xxx\&sub_label=yyy - curl --location-trusted -u user:passwd -T data.file http://host:port/api/{db}/{table2}/_load?label=xxx\&sub_label=zzz - curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_commit?label=xxx - curl --location-trusted -u user:passwd -XPOST http://host:port/api/{db}/_multi_desc?label=xxx - - 'MULTI LOAD'在'MINI LOAD'的基础上,可以支持用户同时向多个表进行导入,具体的命令如上面所示 - '/api/{db}/_multi_start' 开始一个多表导入任务 - '/api/{db}/{table}/_load' 向一个导入任务添加一个要导入的表,与'MINI LOAD'的主要区别是,需要传入'sub_label'参数 - '/api/{db}/_multi_commit' 提交整个多表导入任务,后台开始进行处理 - '/api/{db}/_multi_abort' 放弃一个多表导入任务 - '/api/{db}/_multi_desc' 可以展示某个多表导入任务已经提交的作业数 - - HTTP协议相关说明 - 权限认证 当前 Doris 使用http的Basic方式权限认证。所以在导入的时候需要指定用户名密码 - 这种方式是明文传递密码的,鉴于我们当前都是内网环境。。。 - - Expect Doris 需要发送过来的http请求,需要有'Expect'头部信息,内容为'100-continue' - 为什么呢?因为我们需要将请求进行redirect,那么必须在传输数据内容之前, - 这样可以避免造成数据的多次传输,从而提高效率。 - - Content-Length Doris 需要在发送请求是带有'Content-Length'这个头部信息。如果发送的内容比 - 'Content-Length'要少,那么Palo认为传输出现问题,则提交此次任务失败。 - NOTE: 如果,发送的数据比'Content-Length'要多,那么 Doris 只读取'Content-Length' - 长度的内容,并进行导入 - - 参数说明: - user: 用户如果是在default_cluster中的,user即为user_name。否则为user_name@cluster_name。 - - label: 用于指定这一批次导入的label号,用于后期进行作业状态查询等。 - 这个参数是必须传入的。 - - sub_label: 用于指定一个多表导入任务内部的子版本号。对于多表导入的load, 这个参数是必须传入的。 - - columns: 用于描述导入文件中对应的列名字。 - 如果不传入,那么认为文件中的列顺序与建表的顺序一致, - 指定的方式为逗号分隔,例如:columns=k1,k2,k3,k4 - - column_separator: 用于指定列与列之间的分隔符,默认的为'\t' - NOTE: 需要进行url编码,譬如需要指定'\t'为分隔符, - 那么应该传入'column_separator=%09' - - max_filter_ratio: 用于指定允许过滤不规范数据的最大比例,默认是0,不允许过滤 - 自定义指定应该如下:'max_filter_ratio=0.2',含义是允许20%的错误率 - 在'_multi_start'时传入有效果 - - NOTE: - 1. 此种导入方式当前是在一台机器上完成导入工作,因而不宜进行数据量较大的导入工作。 - 建议导入数据量不要超过1GB - - 2. 当前无法使用`curl -T "{file1, file2}"`这样的方式提交多个文件,因为curl是将其拆成多个 - 请求发送的,多个请求不能共用一个label号,所以无法使用 - - 3. 支持类似streaming的方式使用curl来向 Doris 中导入数据,但是,只有等这个streaming结束后 Doris - 才会发生真实的导入行为,这中方式数据量也不能过大。 - -## example - - 1. 将本地文件'testData1'中的数据导入到数据库'testDb'中'testTbl1'的表,并且 - 把'testData2'的数据导入到'testDb'中的表'testTbl2'(用户是defalut_cluster中的) - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 - curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 - curl --location-trusted -u root -T testData2 http://host:port/api/testDb/testTbl2/_load?label=123\&sub_label=2 - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_commit?label=123 - - 2. 多表导入中途放弃(用户是defalut_cluster中的) - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 - curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_abort?label=123 - - 3. 多表导入查看已经提交多少内容(用户是defalut_cluster中的) - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_start?label=123 - curl --location-trusted -u root -T testData1 http://host:port/api/testDb/testTbl1/_load?label=123\&sub_label=1 - curl --location-trusted -u root -XPOST http://host:port/api/testDb/_multi_desc?label=123 - -## keyword - MULTI, MINI, LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md deleted file mode 100644 index 4a880957ec..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE ROUTINE LOAD.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -{ - "title": "PAUSE ROUTINE LOAD", - "language": "zh-CN" -} ---- - - - -# PAUSE ROUTINE LOAD -## example - -1. 暂停名称为 test1 的例行导入作业。 - - PAUSE ROUTINE LOAD FOR test1; - -2. 暂停所有正在运行的例行导入作业 - - PAUSE ALL ROUTINE LOAD; - -## keyword - PAUSE,ALL,ROUTINE,LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md deleted file mode 100644 index 4612bebf91..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/PAUSE SYNC JOB.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -{ - "title": "PAUSE SYNC JOB", - "language": "zh-CN" -} ---- - - - -# PAUSE SYNC JOB - -## description - -通过 `job_name` 暂停一个数据库内正在运行的常驻数据同步作业,被暂停的作业将停止同步数据,保持消费的最新位置,直到被用户恢复。 - -语法: - - PAUSE SYNC JOB [db.]job_name - -## example -1. 暂停名称为 `job_name` 的数据同步作业。 - - PAUSE SYNC JOB `job_name`; - -## keyword - PAUSE,SYNC,JOB,BINLOG \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md deleted file mode 100644 index 5afa216b60..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME ROUTINE LOAD.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -{ - "title": "RESUME ROUTINE LOAD", - "language": "zh-CN" -} ---- - - - -# RESUME ROUTINE LOAD -## example - -1. 恢复名称为 test1 的例行导入作业。 - - RESUME ROUTINE LOAD FOR test1; - -2. 恢复所有暂停中的例行导入作业。 - - RESUME ALL ROUTINE LOAD; - -## keyword - - RESUME,ALL,ROUTINE,LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md deleted file mode 100644 index 9237419443..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/RESUME SYNC JOB.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "RESUME SYNC JOB", - "language": "zh-CN" -} ---- - - - -# RESUME SYNC JOB - -## description - -通过 `job_name`恢复一个当前数据库已被暂停的常驻数据同步作业,作业将从上一次被暂停前最新的位置继续同步数据。 - -语法: - - RESUME SYNC JOB [db.]job_name - -## example - -1. 恢复名称为 `job_name` 的数据同步作业 - - RESUME SYNC JOB `job_name`; - -## keyword - RESUME,SYNC,JOB,BINLOG \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md deleted file mode 100644 index 3e61aab9d3..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/ROUTINE LOAD.md +++ /dev/null @@ -1,544 +0,0 @@ ---- -{ - "title": "ROUTINE LOAD", - "language": "zh-CN" -} ---- - - - -# ROUTINE LOAD -## description - - 例行导入(Routine Load)功能,支持用户提交一个常驻的导入任务,通过不断的从指定的数据源读取数据,将数据导入到 Doris 中。 - 目前仅支持通过无认证或者 SSL 认证方式,从 Kakfa 导入文本格式(CSV)的数据。 - -语法: - - CREATE ROUTINE LOAD [db.]job_name ON tbl_name - [merge_type] - [load_properties] - [job_properties] - FROM data_source - [data_source_properties] - - 1. [db.]job_name - - 导入作业的名称,在同一个 database 内,相同名称只能有一个 job 在运行。 - - 2. tbl_name - - 指定需要导入的表的名称。 - - 3. merge_type - 数据的合并类型,一共支持三种类型APPEND、DELETE、MERGE 其中,APPEND是默认值,表示这批数据全部需要追加到现有数据中,DELETE 表示删除与这批数据key相同的所有行,MERGE 语义 需要与delete on条件联合使用,表示满足delete 条件的数据按照DELETE 语义处理其余的按照APPEND 语义处理, 语法为[WITH MERGE|APPEND|DELETE] - - 4. load_properties - - 用于描述导入数据。语法: - [column_separator], - [columns_mapping], - [where_predicates], - [delete_on_predicates], - [source_sequence], - [partitions], - [preceding_predicates] - - 1. column_separator: - - 指定列分隔符,如: - - COLUMNS TERMINATED BY "," - - 默认为:\t - - 2. columns_mapping: - - 指定源数据中列的映射关系,以及定义衍生列的生成方式。 - - 1. 映射列: - - 按顺序指定,源数据中各个列,对应目的表中的哪些列。对于希望跳过的列,可以指定一个不存在的列名。 - 假设目的表有三列 k1, k2, v1。源数据有4列,其中第1、2、4列分别对应 k2, k1, v1。则书写如下: - - COLUMNS (k2, k1, xxx, v1) - - 其中 xxx 为不存在的一列,用于跳过源数据中的第三列。 - - 2. 衍生列: - - 以 col_name = expr 的形式表示的列,我们称为衍生列。即支持通过 expr 计算得出目的表中对应列的值。 - 衍生列通常排列在映射列之后,虽然这不是强制的规定,但是 Doris 总是先解析映射列,再解析衍生列。 - 接上一个示例,假设目的表还有第4列 v2,v2 由 k1 和 k2 的和产生。则可以书写如下: - - COLUMNS (k2, k1, xxx, v1, v2 = k1 + k2); - - 3. where_predicates - - 用于指定过滤条件,以过滤掉不需要的列。过滤列可以是映射列或衍生列。 - 例如我们只希望导入 k1 大于 100 并且 k2 等于 1000 的列,则书写如下: - - WHERE k1 > 100 and k2 = 1000 - - 4. partitions - - 指定导入目的表的哪些 partition 中。如果不指定,则会自动导入到对应的 partition 中。 - 示例: - - PARTITION(p1, p2, p3) - - 5. delete_on_predicates - 表示删除条件,仅在 merge type 为MERGE 时有意义,语法与where 相同 - - 6. source_sequence: - - 只适用于UNIQUE_KEYS,相同key列下,保证value列按照source_sequence列进行REPLACE, source_sequence可以是数据源中的列,也可以是表结构中的一列。 - - 7. preceding_predicates - - PRECEDING FILTER predicate - - 用于过滤原始数据。原始数据是未经列映射、转换的数据。用户可以在对转换前的数据前进行一次过滤,选取期望的数据,再进行转换。 - - 5. job_properties - - 用于指定例行导入作业的通用参数。 - 语法: - - PROPERTIES ( - "key1" = "val1", - "key2" = "val2" - ) - - 目前我们支持以下参数: - - 1. desired_concurrent_number - - 期望的并发度。一个例行导入作业会被分成多个子任务执行。这个参数指定一个作业最多有多少任务可以同时执行。必须大于0。默认为3。 - 这个并发度并不是实际的并发度,实际的并发度,会通过集群的节点数、负载情况,以及数据源的情况综合考虑。 - 例: - - "desired_concurrent_number" = "3" - - 2. max_batch_interval/max_batch_rows/max_batch_size - - 这三个参数分别表示: - 1)每个子任务最大执行时间,单位是秒。范围为 5 到 60。默认为10。 - 2)每个子任务最多读取的行数。必须大于等于200000。默认是200000。 - 3)每个子任务最多读取的字节数。单位是字节,范围是 100MB 到 1GB。默认是 100MB。 - - 这三个参数,用于控制一个子任务的执行时间和处理量。当任意一个达到阈值,则任务结束。 - 例: - - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200" - - 3. max_error_number - - 采样窗口内,允许的最大错误行数。必须大于等于0。默认是 0,即不允许有错误行。 - 采样窗口为 max_batch_rows * 10。即如果在采样窗口内,错误行数大于 max_error_number,则会导致例行作业被暂停,需要人工介入检查数据质量问题。 - 被 where 条件过滤掉的行不算错误行。 - - 4. strict_mode - - 是否开启严格模式,默认为关闭。如果开启后,非空原始数据的列类型变换如果结果为 NULL,则会被过滤。指定方式为 "strict_mode" = "true" - - 5. timezone - - 指定导入作业所使用的时区。默认为使用 Session 的 timezone 参数。该参数会影响所有导入涉及的和时区有关的函数结果。 - - 6. format - - 指定导入数据格式,默认是csv,支持json格式。 - - 7. jsonpaths - - jsonpaths: 导入json方式分为:简单模式和匹配模式。如果设置了jsonpath则为匹配模式导入,否则为简单模式导入,具体可参考示例。 - - 8. strip_outer_array - - 布尔类型,为true表示json数据以数组对象开始且将数组对象中进行展平,默认值是false。 - - 9. json_root - - json_root为合法的jsonpath字符串,用于指定json document的根节点,默认值为""。 - - 10. send_batch_parallelism - - 整型,用于设置发送批处理数据的并行度,如果并行度的值超过 BE 配置中的 `max_send_batch_parallelism_per_job`,那么作为协调点的 BE 将使用 `max_send_batch_parallelism_per_job` 的值。 - - 11. load_to_single_tablet - - 布尔类型,为true表示支持一个任务只导入数据到对应分区的一个tablet,默认值为false,该参数只允许在对带有random分区的olap表导数的时候设置。 - - 6. data_source - - 数据源的类型。当前支持: - - KAFKA - - 7. data_source_properties - - 指定数据源相关的信息。 - 语法: - - ( - "key1" = "val1", - "key2" = "val2" - ) - - 1. KAFKA 数据源 - - 1. kafka_broker_list - - Kafka 的 broker 连接信息。格式为 ip:host。多个broker之间以逗号分隔。 - 示例: - - "kafka_broker_list" = "broker1:9092,broker2:9092" - - 2. kafka_topic - - 指定要订阅的 Kafka 的 topic。 - 示例: - - "kafka_topic" = "my_topic" - - 3. kafka_partitions/kafka_offsets - - 指定需要订阅的 kafka partition,以及对应的每个 partition 的起始 offset。 - - offset 可以指定从大于等于 0 的具体 offset,或者: - 1) OFFSET_BEGINNING: 从有数据的位置开始订阅。 - 2) OFFSET_END: 从末尾开始订阅。 - 3) 时间戳,格式必须如:"2021-05-11 10:00:00",系统会自动定位到大于等于该时间戳的第一个消息的offset。注意,时间戳格式的offset不能和数字类型混用,只能选其一。 - - 如果没有指定,则默认从 OFFSET_END 开始订阅 topic 下的所有 partition。 - 示例: - - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,OFFSET_BEGINNING,OFFSET_END" - - "kafka_partitions" = "0,1", - "kafka_offsets" = "2021-05-11 10:00:00, 2021-05-11 11:00:00" - - 4. property - - 指定自定义kafka参数。 - 功能等同于kafka shell中 "--property" 参数。 - 当参数的 value 为一个文件时,需要在 value 前加上关键词:"FILE:"。 - 关于如何创建文件,请参阅 "HELP CREATE FILE;" - 更多支持的自定义参数,请参阅 librdkafka 的官方 CONFIGURATION 文档中,client 端的配置项。 - - 示例: - "property.client.id" = "12345", - "property.ssl.ca.location" = "FILE:ca.pem" - - 1.使用 SSL 连接 Kafka 时,需要指定以下参数: - - "property.security.protocol" = "ssl", - "property.ssl.ca.location" = "FILE:ca.pem", - "property.ssl.certificate.location" = "FILE:client.pem", - "property.ssl.key.location" = "FILE:client.key", - "property.ssl.key.password" = "abcdefg" - - 其中: - "property.security.protocol" 和 "property.ssl.ca.location" 为必须,用于指明连接方式为 SSL,以及 CA 证书的位置。 - - 如果 Kafka server 端开启了 client 认证,则还需设置: - - "property.ssl.certificate.location" - "property.ssl.key.location" - "property.ssl.key.password" - - 分别用于指定 client 的 public key,private key 以及 private key 的密码。 - - - 2.指定kafka partition的默认起始offset - 如果没有指定kafka_partitions/kafka_offsets,默认消费所有分区,此时可以指定kafka_default_offsets指定起始 offset。默认为 OFFSET_END,即从末尾开始订阅。 - 值为 - 1) OFFSET_BEGINNING: 从有数据的位置开始订阅。 - 2) OFFSET_END: 从末尾开始订阅。 - 3) 时间戳,格式同 kafka_offsets - - 示例: - "property.kafka_default_offsets" = "OFFSET_BEGINNING" - "property.kafka_default_offsets" = "2021-05-11 10:00:00" - - 8. 导入数据格式样例 - - 整型类(TINYINT/SMALLINT/INT/BIGINT/LARGEINT):1, 1000, 1234 - 浮点类(FLOAT/DOUBLE/DECIMAL):1.1, 0.23, .356 - 日期类(DATE/DATETIME):2017-10-03, 2017-06-13 12:34:03。 - 字符串类(CHAR/VARCHAR)(无引号):I am a student, a - NULL值:\N - -## example - 1. 为 example_db 的 example_tbl 创建一个名为 test1 的 Kafka 例行导入任务。指定列分隔符和 group.id 和 client.id,并且自动默认消费所有分区,且从有数据的位置(OFFSET_BEGINNING)开始订阅 - - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS TERMINATED BY ",", - COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100) - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "property.group.id" = "xxx", - "property.client.id" = "xxx", - "property.kafka_default_offsets" = "OFFSET_BEGINNING" - ); - - 2. 为 example_db 的 example_tbl 创建一个名为 test1 的 Kafka 例行导入任务。导入任务为严格模式。 - - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100), - WHERE k1 > 100 and k2 like "%doris%" - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,0,200" - ); - - 3. 通过 SSL 认证方式,从 Kafka 集群导入数据。同时设置 client.id 参数。导入任务为非严格模式,时区为 Africa/Abidjan - - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(k1, k2, k3, v1, v2, v3 = k1 * 100), - WHERE k1 > 100 and k2 like "%doris%" - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false", - "timezone" = "Africa/Abidjan" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "property.security.protocol" = "ssl", - "property.ssl.ca.location" = "FILE:ca.pem", - "property.ssl.certificate.location" = "FILE:client.pem", - "property.ssl.key.location" = "FILE:client.key", - "property.ssl.key.password" = "abcdefg", - "property.client.id" = "my_client_id" - ); - 4. 简单模式导入json - CREATE ROUTINE LOAD example_db.test_json_label_1 ON table1 - COLUMNS(category,price,author) - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false", - "format" = "json" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2", - "kafka_offsets" = "0,0,0" - ); - 支持两种json数据格式: - 1){"category":"a9jadhx","author":"test","price":895} - 2)[ - {"category":"a9jadhx","author":"test","price":895}, - {"category":"axdfa1","author":"EvelynWaugh","price":1299} - ] - - 5. 精准导入json数据格式 - CREATE TABLE `example_tbl` ( - `category` varchar(24) NULL COMMENT "", - `author` varchar(24) NULL COMMENT "", - `timestamp` bigint(20) NULL COMMENT "", - `dt` int(11) NULL COMMENT "", - `price` double REPLACE - ) ENGINE=OLAP - AGGREGATE KEY(`category`,`author`,`timestamp`,`dt`) - COMMENT "OLAP" - PARTITION BY RANGE(`dt`) - (PARTITION p0 VALUES [("-2147483648"), ("20200509")), - PARTITION p20200509 VALUES [("20200509"), ("20200510")), - PARTITION p20200510 VALUES [("20200510"), ("20200511")), - PARTITION p20200511 VALUES [("20200511"), ("20200512"))) - DISTRIBUTED BY HASH(`category`,`author`,`timestamp`) BUCKETS 4 - PROPERTIES ( - "replication_num" = "1" - ); - - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(category, author, price, timestamp, dt=from_unixtime(timestamp, '%Y%m%d')) - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false", - "format" = "json", - "jsonpaths" = "[\"$.category\",\"$.author\",\"$.price\",\"$.timestamp\"]", - "strip_outer_array" = "true" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2", - "kafka_offsets" = "0,0,0" - ); - json数据格式: - [ - {"category":"11","title":"SayingsoftheCentury","price":895,"timestamp":1589191587}, - {"category":"22","author":"2avc","price":895,"timestamp":1589191487}, - {"category":"33","author":"3avc","title":"SayingsoftheCentury","timestamp":1589191387} - ] - 说明: - 1)如果json数据是以数组开始,并且数组中每个对象是一条记录,则需要将strip_outer_array设置成true,表示展平数组。 - 2)如果json数据是以数组开始,并且数组中每个对象是一条记录,在设置jsonpath时,我们的ROOT节点实际上是数组中对象。 - - 6. 用户指定根节点json_root - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - COLUMNS(category, author, price, timestamp, dt=from_unixtime(timestamp, '%Y%m%d')) - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false", - "format" = "json", - "jsonpaths" = "[\"$.category\",\"$.author\",\"$.price\",\"$.timestamp\"]", - "strip_outer_array" = "true", - "json_root" = "$.RECORDS" - ) - FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2", - "kafka_offsets" = "0,0,0" - ); - json数据格式: - { - "RECORDS":[ - {"category":"11","title":"SayingsoftheCentury","price":895,"timestamp":1589191587}, - {"category":"22","author":"2avc","price":895,"timestamp":1589191487}, - {"category":"33","author":"3avc","title":"SayingsoftheCentury","timestamp":1589191387} - ] - } - - 7. 为 example_db 的 example_tbl 创建一个名为 test1 的 Kafka 例行导入任务。并且删除与v3 >100 行相匹配的key列的行 - - CREATE ROUTINE LOAD example_db.test1 ON example_tbl - WITH MERGE - COLUMNS(k1, k2, k3, v1, v2, v3), - WHERE k1 > 100 and k2 like "%doris%", - DELETE ON v3 >100 - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "20", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200", - "strict_mode" = "false" - ) - FROM KAFKA - - 8. 导入数据到含有sequence列的UNIQUE_KEYS表中 - CREATE ROUTINE LOAD example_db.test_job ON example_tbl - COLUMNS TERMINATED BY ",", - COLUMNS(k1,k2,source_sequence,v1,v2), - ORDER BY source_sequence - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "30", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200" - ) FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,0,200" - ); - - 8. 过滤原始数据 - - CREATE ROUTINE LOAD example_db.test_job ON example_tbl - COLUMNS TERMINATED BY ",", - COLUMNS(k1,k2,source_sequence,v1,v2), - PRECEDING FILTER k1 > 2 - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "30", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200" - ) FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "kafka_partitions" = "0,1,2,3", - "kafka_offsets" = "101,0,0,200" - ); - - 9. 从指定的时间点开始消费 - - CREATE ROUTINE LOAD example_db.test_job ON example_tbl - PROPERTIES - ( - "desired_concurrent_number"="3", - "max_batch_interval" = "30", - "max_batch_rows" = "300000", - "max_batch_size" = "209715200" - ) FROM KAFKA - ( - "kafka_broker_list" = "broker1:9092,broker2:9092,broker3:9092", - "kafka_topic" = "my_topic", - "property.kafka_default_offsets" = "2021-10-10 11:00:00" - ); - -## keyword - - CREATE,ROUTINE,LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md deleted file mode 100644 index 4e094eaf4a..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ALTER.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -{ - "title": "SHOW ALTER", - "language": "zh-CN" -} ---- - - - -# SHOW ALTER -## description - 该语句用于展示当前正在进行的各类修改任务的执行情况 - 语法: - SHOW ALTER [CLUSTER | TABLE [COLUMN | ROLLUP] [FROM db_name]]; - - 说明: - TABLE COLUMN:展示修改列的 ALTER 任务 - 支持语法[WHERE TableName|CreateTime|FinishTime|State] [ORDER BY] [LIMIT] - TABLE ROLLUP:展示创建或删除 ROLLUP index 的任务 - 如果不指定 db_name,使用当前默认 db - CLUSTER: 展示集群操作相关任务情况(仅管理员使用!待实现...) - -## example - 1. 展示默认 db 的所有修改列的任务执行情况 - SHOW ALTER TABLE COLUMN; - - 2. 展示某个表最近一次修改列的任务执行情况 - SHOW ALTER TABLE COLUMN WHERE TableName = "table1" ORDER BY CreateTime DESC LIMIT 1; - - 3. 展示指定 db 的创建或删除 ROLLUP index 的任务执行情况 - SHOW ALTER TABLE ROLLUP FROM example_db; - - 4. 展示集群操作相关任务(仅管理员使用!待实现...) - SHOW ALTER CLUSTER; - -## keyword - SHOW,ALTER - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md deleted file mode 100644 index f636b71512..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW BACKUP.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -{ - "title": "SHOW BACKUP", - "language": "zh-CN" -} ---- - - - -# SHOW BACKUP -## description - 该语句用于查看 BACKUP 任务 - 语法: - SHOW BACKUP [FROM db_name] - - 说明: - 1. Palo 中仅保存最近一次 BACKUP 任务。 - 2. 各列含义如下: - JobId: 唯一作业id - SnapshotName: 备份的名称 - DbName: 所属数据库 - State: 当前阶段 - PENDING: 提交作业后的初始状态 - SNAPSHOTING: 执行快照中 - UPLOAD_SNAPSHOT:快照完成,准备上传 - UPLOADING: 快照上传中 - SAVE_META: 将作业元信息保存为本地文件 - UPLOAD_INFO: 上传作业元信息 - FINISHED: 作业成功 - CANCELLED: 作业失败 - BackupObjs: 备份的表和分区 - CreateTime: 任务提交时间 - SnapshotFinishedTime: 快照完成时间 - UploadFinishedTime: 快照上传完成时间 - FinishedTime: 作业结束时间 - UnfinishedTasks: 在 SNAPSHOTING 和 UPLOADING 阶段会显示还未完成的子任务id - Status: 如果作业失败,显示失败信息 - Timeout: 作业超时时间,单位秒 - -## example - 1. 查看 example_db 下最后一次 BACKUP 任务。 - SHOW BACKUP FROM example_db; - -## keyword - SHOW, BACKUP - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md deleted file mode 100644 index cacc894262..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW CREATE FUNCTION.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -{ - "title": "SHOW CREATE FUNCTION", - "language": "zh-CN" -} ---- - - - -# SHOW CREATE FUNCTION -## description - 该语句用于展示用户自定义函数的创建语句 - 语法: - SHOW CREATE FUNTION function_name(arg_type [, ...]) [FROM db_name]]; - - 说明: - `function_name`: 要展示的函数名称 - `arg_type`: 要展示的函数的参数列表 - 如果不指定 db_name,使用当前默认 db - -## example - 1. 展示默认db下指定函数的创建语句 - SHOW CREATE FUNCTION my_add(INT, INT) - -## keyword - SHOW,CREATE,FUNCTION \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md deleted file mode 100644 index 4809962029..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW CREATE ROUTINE LOAD.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ -"title": "SHOW CREATE ROUTINE LOAD", -"language": "zh-CN" -} ---- - - - -# SHOW CREATE ROUTINE LOAD -## description - 该语句用于展示例行导入作业的创建语句。 - 结果中的 kafka partition 和 offset 展示的当前消费的 partition,以及对应的待消费的 offset。 - - 语法: - SHOW [ALL] CREATE ROUTINE LOAD for load_name; - - 说明: - `ALL`: 可选参数,代表获取所有作业,包括历史作业 - `load_name`: 例行导入作业名称 - -## example - 1. 展示默认db下指定例行导入作业的创建语句 - SHOW CREATE ROUTINE LOAD for test_load - -## keyword - SHOW,CREATE,ROUTINE,LOAD diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md deleted file mode 100644 index 75f93aae37..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATA SKEW.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -{ - "title": "SHOW DATA SKEW", - "language": "zh-CN" -} ---- - - - -# SHOW DATA SKEW -## description - - 该语句用于查看表或某个分区的数据倾斜情况。 - - 语法: - - SHOW DATA SKEW FROM [db_name.]tbl_name PARTITION (partition_name); - - 说明: - - 1. 必须指定且仅指定一个分区。对于非分区表,分区名称同表名。 - 2. 结果将展示指定分区下,各个分桶的数据量,以及每个分桶数据量在总数据量中的占比。 - -## example - - 1. 查看表的数据倾斜情况 - - SHOW DATA SKEW FROM db1.test PARTITION(p1); - -## keyword - - SHOW,DATA,SKEW - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md deleted file mode 100644 index e371c0e51c..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATA.md +++ /dev/null @@ -1,113 +0,0 @@ ---- -{ - "title": "SHOW DATA", - "language": "zh-CN" -} ---- - - - -# SHOW DATA - -## description - -该语句用于展示数据量、副本数量以及统计行数。 - -语法: - -``` -SHOW DATA [FROM db_name[.table_name]] [ORDER BY ...]; -``` - -说明: - -1. 如果不指定 FROM 子句,则展示当前 db 下细分到各个 table 的数据量和副本数量。其中数据量为所有副本的总数据量。而副本数量为表的所有分区以及所有物化视图的副本数量。 - -2. 如果指定 FROM 子句,则展示 table 下细分到各个物化视图的数据量、副本数量和统计行数。其中数据量为所有副本的总数据量。副本数量为对应物化视图的所有分区的副本数量。统计行数为对应物化视图的所有分区统计行数。 - -3. 统计行数时,以多个副本中,行数最大的那个副本为准。 - -4. 结果集中的 `Total` 行表示汇总行。`Quota` 行表示当前数据库设置的配额。`Left` 行表示剩余配额。 - -5. 如果想查看各个 Partition 的大小,请参阅 `help show partitions`。 - -6. 可以使用 ORDER BY 对任意列组合进行排序。 - -## example - -1. 展示默认 db 的各个 table 的数据量,副本数量,汇总数据量和汇总副本数量。 - - ``` - SHOW DATA; - ``` - - ``` - +-----------+-------------+--------------+ - | TableName | Size | ReplicaCount | - +-----------+-------------+--------------+ - | tbl1 | 900.000 B | 6 | - | tbl2 | 500.000 B | 3 | - | Total | 1.400 KB | 9 | - | Quota | 1024.000 GB | 1073741824 | - | Left | 1021.921 GB | 1073741815 | - +-----------+-------------+--------------+ - ``` - -2. 展示指定 db 的下指定表的细分数据量、副本数量和统计行数 - - ``` - SHOW DATA FROM example_db.test; - ``` - - ``` - +-----------+-----------+-----------+--------------+----------+ - | TableName | IndexName | Size | ReplicaCount | RowCount | - +-----------+-----------+-----------+--------------+----------+ - | test | r1 | 10.000MB | 30 | 10000 | - | | r2 | 20.000MB | 30 | 20000 | - | | test2 | 50.000MB | 30 | 50000 | - | | Total | 80.000 | 90 | | - +-----------+-----------+-----------+--------------+----------+ - ``` - -3. 可以按照数据量、副本数量、统计行数等进行组合排序 - - ``` - SHOW DATA ORDER BY ReplicaCount desc,Size asc; - ``` - - ``` - +-----------+-------------+--------------+ - | TableName | Size | ReplicaCount | - +-----------+-------------+--------------+ - | table_c | 3.102 KB | 40 | - | table_d | .000 | 20 | - | table_b | 324.000 B | 20 | - | table_a | 1.266 KB | 10 | - | Total | 4.684 KB | 90 | - | Quota | 1024.000 GB | 1073741824 | - | Left | 1024.000 GB | 1073741734 | - +-----------+-------------+--------------+ - ``` - -## keyword - - SHOW,DATA - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md deleted file mode 100644 index 61ae6a1ad5..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATABASE ID.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "SHOW DATABASE", - "language": "zh-CN" -} ---- - - - -# SHOW DATABASE ID -## description - 该语句用于根据 database id 查找对应的 database name(仅管理员使用) - 语法: - SHOW DATABASE [database_id] - -## example - 1. 根据 database id 查找对应的 database name - SHOW DATABASE 1001; - -## keyword - SHOW,DATABASE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md deleted file mode 100644 index 203bfda855..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DATABASES.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -{ - "title": "SHOW DATABASES", - "language": "zh-CN" -} ---- - - - -# SHOW DATABASES -## description - 该语句用于展示当前可见的 db - 语法: - SHOW DATABASES; - -## keyword - SHOW,DATABASES - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md deleted file mode 100644 index b6a653f0f3..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DELETE.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "SHOW DELETE", - "language": "zh-CN" -} ---- - - - -# SHOW DELETE -## description - 该语句用于展示已执行成功的历史 delete 任务 - 语法: - SHOW DELETE [FROM db_name] - -## example - 1. 展示数据库 database 的所有历史 delete 任务 - SHOW DELETE FROM database; - -## keyword - SHOW,DELETE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md deleted file mode 100644 index eac7a25060..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW DYNAMIC PARTITION TABLES.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "SHOW DYNAMIC PARTITION TABLES", - "language": "zh-CN" -} ---- - - - -# SHOW DYNAMIC PARTITION TABLES -## description - 该语句用于展示当前db下所有的动态分区表状态 - 语法: - SHOW DYNAMIC PARTITION TABLES [FROM db_name]; - -## example - 1. 展示数据库 database 的所有动态分区表状态 - SHOW DYNAMIC PARTITION TABLES FROM database; - -## keyword - SHOW,DYNAMIC,PARTITION,TABLES - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md deleted file mode 100644 index c29b9762cd..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW EXPORT.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -{ - "title": "SHOW EXPORT", - "language": "zh-CN" -} ---- - - - -# SHOW EXPORT -## description - 该语句用于展示指定的导出任务的执行情况 - 语法: - SHOW EXPORT - [FROM db_name] - [ - WHERE - [ID = your_job_id] - [STATE = ["PENDING"|"EXPORTING"|"FINISHED"|"CANCELLED"]] - [LABEL [ = "your_label" | LIKE "label_matcher"]] - ] - [ORDER BY ...] - [LIMIT limit]; - - 说明: - 1) 如果不指定 db_name,使用当前默认db - 2) 如果指定了 STATE,则匹配 EXPORT 状态 - 3) 可以使用 ORDER BY 对任意列组合进行排序 - 4) 如果指定了 LIMIT,则显示 limit 条匹配记录。否则全部显示 - -## example - 1. 展示默认 db 的所有导出任务 - SHOW EXPORT; - - 2. 展示指定 db 的导出任务,按 StartTime 降序排序 - SHOW EXPORT FROM example_db ORDER BY StartTime DESC; - - 3. 展示指定 db 的导出任务,state 为 "exporting", 并按 StartTime 降序排序 - SHOW EXPORT FROM example_db WHERE STATE = "exporting" ORDER BY StartTime DESC; - - 4. 展示指定 db,指定 job_id 的导出任务 - SHOW EXPORT FROM example_db WHERE ID = job_id; - - 5. 展示指定 db,指定 label 的导出任务 - SHOW EXPORT FROM example_db WHERE LABEL = "mylabel"; - - 6. 展示指定 db,label 中前缀是 labelprefix 的导出任务 - SHOW EXPORT FROM example_db WHERE LABEL LIKE "labelprefix%"; - -## keyword - SHOW,EXPORT - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md deleted file mode 100644 index cbdced1ccb..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW LOAD.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -{ - "title": "SHOW LOAD", - "language": "zh-CN" -} ---- - - - -# SHOW LOAD -## description - 该语句用于展示指定的导入任务的执行情况 - 语法: - SHOW LOAD - [FROM db_name] - [ - WHERE - [LABEL [ = "your_label" | LIKE "label_matcher"]] - [STATE = ["PENDING"|"ETL"|"LOADING"|"FINISHED"|"CANCELLED"|]] - ] - [ORDER BY ...] - [LIMIT limit][OFFSET offset]; - - 说明: - 1) 如果不指定 db_name,使用当前默认db - 2) 如果使用 LABEL LIKE,则会匹配导入任务的 label 包含 label_matcher 的导入任务 - 3) 如果使用 LABEL = ,则精确匹配指定的 label - 4) 如果指定了 STATE,则匹配 LOAD 状态 - 5) 可以使用 ORDER BY 对任意列组合进行排序 - 6) 如果指定了 LIMIT,则显示 limit 条匹配记录。否则全部显示 - 7) 如果指定了 OFFSET,则从偏移量offset开始显示查询结果。默认情况下偏移量为0。 - 8) 如果是使用 broker/mini load,则 URL 列中的连接可以使用以下命令查看: - - SHOW LOAD WARNINGS ON 'url' - -## example - 1. 展示默认 db 的所有导入任务 - SHOW LOAD; - - 2. 展示指定 db 的导入任务,label 中包含字符串 "2014_01_02",展示最老的10个 - SHOW LOAD FROM example_db WHERE LABEL LIKE "2014_01_02" LIMIT 10; - - 3. 展示指定 db 的导入任务,指定 label 为 "load_example_db_20140102" 并按 LoadStartTime 降序排序 - SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" ORDER BY LoadStartTime DESC; - - 4. 展示指定 db 的导入任务,指定 label 为 "load_example_db_20140102" ,state 为 "loading", 并按 LoadStartTime 降序排序 - SHOW LOAD FROM example_db WHERE LABEL = "load_example_db_20140102" AND STATE = "loading" ORDER BY LoadStartTime DESC; - - 5. 展示指定 db 的导入任务 并按 LoadStartTime 降序排序,并从偏移量5开始显示10条查询结果 - SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 5,10; - SHOW LOAD FROM example_db ORDER BY LoadStartTime DESC limit 10 offset 5; - - 6. 小批量导入是查看导入状态的命令 - curl --location-trusted -u {user}:{passwd} http://{hostname}:{port}/api/{database}/_load_info?label={labelname} - -## keyword - SHOW,LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md deleted file mode 100644 index 0a618af98d..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PARTITION ID.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "SHOW PARTITION", - "language": "zh-CN" -} ---- - - - -# SHOW PARTITION ID -## description - 该语句用于根据 partition id 查找对应的 database name, table name, partition name(仅管理员使用) - 语法: - SHOW PARTITION [partition_id] - -## example - 1. 根据 partition id 查找对应的 database name, table name, partition name - SHOW PARTITION 10002; - -## keyword - SHOW,PARTITION - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md deleted file mode 100644 index c34eaaaf5e..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PARTITIONS.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "SHOW PARTITIONS", - "language": "zh-CN" -} ---- - - - -# SHOW PARTITIONS -## description - 该语句用于展示分区信息 - 语法: - SHOW [TEMPORARY] PARTITIONS FROM [db_name.]table_name [WHERE] [ORDER BY] [LIMIT]; - 说明: - 支持PartitionId,PartitionName,State,Buckets,ReplicationNum,LastConsistencyCheckTime等列的过滤 - TEMPORARY指定列出临时分区 - -## example - 1.展示指定db下指定表的所有非临时分区信息 - SHOW PARTITIONS FROM example_db.table_name; - - 2.展示指定db下指定表的所有临时分区信息 - SHOW TEMPORARY PARTITIONS FROM example_db.table_name; - - 3.展示指定db下指定表的指定非临时分区的信息 - SHOW PARTITIONS FROM example_db.table_name WHERE PartitionName = "p1"; - - 4.展示指定db下指定表的最新非临时分区的信息 - SHOW PARTITIONS FROM example_db.table_name ORDER BY PartitionId DESC LIMIT 1; -## keyword - SHOW,PARTITIONS diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md deleted file mode 100644 index 479e2b01fd..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW PROPERTY.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -{ - "title": "SHOW PROPERTY", - "language": "zh-CN" -} ---- - - - -# SHOW PROPERTY -## description - 该语句用于查看用户的属性 - 语法: - SHOW PROPERTY [FOR user] [LIKE key] - -## example - 1. 查看 jack 用户的属性 - SHOW PROPERTY FOR 'jack' - - 2. 查看 jack 用户导入cluster相关属性 - SHOW PROPERTY FOR 'jack' LIKE '%load_cluster%' - -## keyword - SHOW, PROPERTY - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md deleted file mode 100644 index 2d49ff89ff..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW REPOSITORIES.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "SHOW REPOSITORIES", - "language": "zh-CN" -} ---- - - - -# SHOW REPOSITORIES -## description - 该语句用于查看当前已创建的仓库。 - 语法: - SHOW REPOSITORIES; - - 说明: - 1. 各列含义如下: - RepoId: 唯一的仓库ID - RepoName: 仓库名称 - CreateTime: 第一次创建该仓库的时间 - IsReadOnly: 是否为只读仓库 - Location: 仓库中用于备份数据的根目录 - Broker: 依赖的 Broker - ErrMsg: Palo 会定期检查仓库的连通性,如果出现问题,这里会显示错误信息 - -## example - 1. 查看已创建的仓库: - SHOW REPOSITORIES; - -## keyword - SHOW, REPOSITORY, REPOSITORIES - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md deleted file mode 100644 index 5be532f27c..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW RESTORE.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -{ - "title": "SHOW RESTORE", - "language": "zh-CN" -} ---- - - - -# SHOW RESTORE -## description - 该语句用于查看 RESTORE 任务 - 语法: - SHOW RESTORE [FROM db_name] - - 说明: - 1. Palo 中仅保存最近一次 RESTORE 任务。 - 2. 各列含义如下: - JobId: 唯一作业id - Label: 要恢复的备份的名称 - Timestamp: 要恢复的备份的时间版本 - DbName: 所属数据库 - State: 当前阶段 - PENDING: 提交作业后的初始状态 - SNAPSHOTING: 执行快照中 - DOWNLOAD: 快照完成,准备下载仓库中的快照 - DOWNLOADING: 快照下载中 - COMMIT: 快照下载完成,准备生效 - COMMITING: 生效中 - FINISHED: 作业成功 - CANCELLED: 作业失败 - AllowLoad: 恢复时是否允许导入(当前不支持) - ReplicationNum: 指定恢复的副本数 - RestoreJobs: 要恢复的表和分区 - CreateTime: 任务提交时间 - MetaPreparedTime: 元数据准备完成时间 - SnapshotFinishedTime: 快照完成时间 - DownloadFinishedTime: 快照下载完成时间 - FinishedTime: 作业结束时间 - UnfinishedTasks: 在 SNAPSHOTING、DOWNLOADING 和 COMMITING 阶段会显示还未完成的子任务id - Status: 如果作业失败,显示失败信息 - Timeout: 作业超时时间,单位秒 - -## example - 1. 查看 example_db 下最近一次 RESTORE 任务。 - SHOW RESTORE FROM example_db; - -## keyword - SHOW, RESTORE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md deleted file mode 100644 index 2e5c762354..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD TASK.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -{ - "title": "SHOW ROUTINE LOAD TASK", - "language": "zh-CN" -} ---- - - - -# SHOW ROUTINE LOAD TASK -## example - -1. 展示名为 test1 的例行导入任务的子任务信息。 - - SHOW ROUTINE LOAD TASK WHERE JobName = "test1"; - -## keyword - SHOW,ROUTINE,LOAD,TASK diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md deleted file mode 100644 index 051c5a88ae..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW ROUTINE LOAD.md +++ /dev/null @@ -1,108 +0,0 @@ ---- -{ - "title": "SHOW ROUTINE LOAD", - "language": "zh-CN" -} ---- - - - -# SHOW ROUTINE LOAD -## description - 该语句用于展示 Routine Load 作业运行状态 - 语法: - SHOW [ALL] ROUTINE LOAD [FOR jobName] [LIKE pattern]; - - 结果说明: - - Id: 作业ID - Name: 作业名称 - CreateTime: 作业创建时间 - PauseTime: 最近一次作业暂停时间 - EndTime: 作业结束时间 - DbName: 对应数据库名称 - TableName: 对应表名称 - State: 作业运行状态 - DataSourceType: 数据源类型:KAFKA - CurrentTaskNum: 当前子任务数量 - JobProperties: 作业配置详情 -DataSourceProperties: 数据源配置详情 - CustomProperties: 自定义配置 - Statistic: 作业运行状态统计信息 - Progress: 作业运行进度 - Lag: 作业延迟状态 -ReasonOfStateChanged: 作业状态变更的原因 - ErrorLogUrls: 被过滤的质量不合格的数据的查看地址 - OtherMsg: 其他错误信息 - - * State - - 有以下4种State: - - * NEED_SCHEDULE:作业等待被调度 - * RUNNING:作业运行中 - * PAUSED:作业被暂停 - * STOPPED:作业已结束 - * CANCELLED:作业已取消 - - * Progress - - 对于Kafka数据源,显示每个分区当前已消费的offset。如 {"0":"2"} 表示Kafka分区0的消费进度为2。 - - * Lag - - 对于Kafka数据源,显示每个分区的消费延迟。如{"0":10} 表示Kafka分区0的消费延迟为10。 - -## example - -1. 展示名称为 test1 的所有例行导入作业(包括已停止或取消的作业)。结果为一行或多行。 - - SHOW ALL ROUTINE LOAD FOR test1; - -2. 展示名称为 test1 的当前正在运行的例行导入作业 - - SHOW ROUTINE LOAD FOR test1; - -3. 显示 example_db 下,所有的例行导入作业(包括已停止或取消的作业)。结果为一行或多行。 - - use example_db; - SHOW ALL ROUTINE LOAD; - -4. 显示 example_db 下,所有正在运行的例行导入作业 - - use example_db; - SHOW ROUTINE LOAD; - -5. 显示 example_db 下,名称为 test1 的当前正在运行的例行导入作业 - - SHOW ROUTINE LOAD FOR example_db.test1; - -6. 显示 example_db 下,名称为 test1 的所有例行导入作业(包括已停止或取消的作业)。结果为一行或多行。 - - SHOW ALL ROUTINE LOAD FOR example_db.test1; - -7. 显示 example_db 下,名称匹配 test1 的当前正在运行的例行导入作业 - - use example_db; - SHOW ROUTINE LOAD LIKE "%test1%"; - -## keyword - SHOW,ROUTINE,LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md deleted file mode 100644 index c1075da902..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SNAPSHOT.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -{ - "title": "SHOW SNAPSHOT", - "language": "zh-CN" -} ---- - - - -# SHOW SNAPSHOT -## description - 该语句用于查看仓库中已存在的备份。 - 语法: - SHOW SNAPSHOT ON `repo_name` - [WHERE SNAPSHOT = "snapshot" [AND TIMESTAMP = "backup_timestamp"]]; - - 说明: - 1. 各列含义如下: - Snapshot: 备份的名称 - Timestamp: 对应备份的时间版本 - Status: 如果备份正常,则显示 OK,否则显示错误信息 - - 2. 如果指定了 TIMESTAMP,则会额外显示如下信息: - Database: 备份数据原属的数据库名称 - Details: 以 Json 的形式,展示整个备份的数据目录及文件结构 - -## example - 1. 查看仓库 example_repo 中已有的备份: - SHOW SNAPSHOT ON example_repo; - - 2. 仅查看仓库 example_repo 中名称为 backup1 的备份: - SHOW SNAPSHOT ON example_repo WHERE SNAPSHOT = "backup1"; - - 2. 查看仓库 example_repo 中名称为 backup1 的备份,时间版本为 "2018-05-05-15-34-26" 的详细信息: - SHOW SNAPSHOT ON example_repo - WHERE SNAPSHOT = "backup1" AND TIMESTAMP = "2018-05-05-15-34-26"; - -## keyword - SHOW, SNAPSHOT - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md deleted file mode 100644 index 53f077dca6..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW STREAM LOAD.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -{ - "title": "SHOW STREAM LOAD", - "language": "zh-CN" -} ---- - - - -# SHOW STREAM LOAD -## description - 该语句用于展示指定的Stream Load任务的执行情况 - 语法: - SHOW STREAM LOAD - [FROM db_name] - [ - WHERE - [LABEL [ = "your_label" | LIKE "label_matcher"]] - [STATUS = ["SUCCESS"|"FAIL"]] - ] - [ORDER BY ...] - [LIMIT limit][OFFSET offset]; - - 说明: - 1) 如果不指定 db_name,使用当前默认db - 2) 如果使用 LABEL LIKE,则会匹配Stream Load任务的 label 包含 label_matcher 的任务 - 3) 如果使用 LABEL = ,则精确匹配指定的 label - 4) 如果指定了 STATUS,则匹配 STREAM LOAD 状态 - 5) 可以使用 ORDER BY 对任意列组合进行排序 - 6) 如果指定了 LIMIT,则显示 limit 条匹配记录。否则全部显示 - 7) 如果指定了 OFFSET,则从偏移量offset开始显示查询结果。默认情况下偏移量为0。 - -## example - 1. 展示默认 db 的所有Stream Load任务 - SHOW STREAM LOAD; - - 2. 展示指定 db 的Stream Load任务,label 中包含字符串 "2014_01_02",展示最老的10个 - SHOW STREAM LOAD FROM example_db WHERE LABEL LIKE "2014_01_02" LIMIT 10; - - 3. 展示指定 db 的Stream Load任务,指定 label 为 "load_example_db_20140102" - SHOW STREAM LOAD FROM example_db WHERE LABEL = "load_example_db_20140102"; - - 4. 展示指定 db 的Stream Load任务,指定 status 为 "success", 并按 StartTime 降序排序 - SHOW STREAM LOAD FROM example_db WHERE STATUS = "success" ORDER BY StartTime DESC; - - 5. 展示指定 db 的导入任务 并按 StartTime 降序排序,并从偏移量5开始显示10条查询结果 - SHOW STREAM LOAD FROM example_db ORDER BY StartTime DESC limit 5,10; - SHOW STREAM LOAD FROM example_db ORDER BY StartTime DESC limit 10 offset 5; - -## keyword - SHOW,STREAM LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md deleted file mode 100644 index 8fbde6130e..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW SYNC JOB.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -{ - "title": "SHOW SYNC JOB", - "language": "zh-CN" -} ---- - - - -# SHOW SYNC JOB - -## description - -此命令用于当前显示所有数据库内的常驻数据同步作业状态。 - -语法: - - SHOW SYNC JOB [FROM db_name] - -## example - -1. 展示当前数据库的所有数据同步作业状态。 - - SHOW SYNC JOB; - -2. 展示数据库 `test_db` 下的所有数据同步作业状态。 - - SHOW SYNC JOB FROM `test_db`; - -## keyword - - SHOW,SYNC,JOB,BINLOG \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md deleted file mode 100644 index 9ea1dfdcbf..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLE CREATION.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -{ - "title": "SHOW TABLE CREATION", - "language": "zh-CN" -} ---- - - - -# SHOW TABLE CREATION - -## Description - - 该语句用于展示指定的 Iceberg Database 建表任务的执行情况 - 语法: - SHOW TABLE CREATION [FROM db_name] [LIKE table_name_wild]; - - 说明: - 1. 使用说明 - 1) 如果不指定 db_name,使用当前默认 db - 2) 如果使用 LIKE,则会匹配表名中包含 table_name_wild 的建表任务 - 2. 各列含义说明 - 1) Database: 数据库名称 - 2) Table:要创建表的名称 - 3) Status:表的创建状态,`success`/`fail` - 4) CreateTime:执行创建该表任务的时间 - 5) Error Msg:创建表失败的错误信息,如果成功,则为空。 -## example - - 1. 展示默认 Iceberg db 中所有的建表任务 - SHOW TABLE CREATION; - - mysql> show table creation ; - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - | Database | Table | Status | Create Time | Error Msg | - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - | default_cluster:iceberg_db | logs_1 | success | 2022-01-24 19:42:45 | | - | default_cluster:iceberg_db | logs | fail | 2022-01-24 19:42:45 | Cannot convert Iceberg type[list] to Doris type. | - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - - 2. 展示指定 Iceberg db 中的建表任务 - SHOW TABLE CREATION FROM example_db; - - mysql> show table creation from iceberg_db; - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - | Database | Table | Status | Create Time | Error Msg | - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - | default_cluster:iceberg_db | logs_1 | success | 2022-01-24 19:42:45 | | - | default_cluster:iceberg_db | logs | fail | 2022-01-24 19:42:45 | Cannot convert Iceberg type[list] to Doris type. | - +----------------------------+--------+---------+---------------------+----------------------------------------------------------+ - - 3. 展示指定 Iceberg db 中的建表任务,表名中包含字符串 "log" 的任务 - SHOW TABLE CREATION FROM example_db LIKE '%log%'; - - mysql> show table creation from iceberg_db like "%1"; - +----------------------------+--------+---------+---------------------+-----------+ - | Database | Table | Status | Create Time | Error Msg | - +----------------------------+--------+---------+---------------------+-----------+ - | default_cluster:iceberg_db | logs_1 | success | 2022-01-24 19:42:45 | | - +----------------------------+--------+---------+---------------------+-----------+ - -## keyword - - SHOW,TABLE CREATION - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md deleted file mode 100644 index 35ca22a0cf..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLE ID.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -{ - "title": "SHOW TABLE", - "language": "zh-CN" -} ---- - - - -# SHOW TABLE ID -## description - 该语句用于根据 table id 查找对应的 database name, table name(仅管理员使用) - 语法: - SHOW TABLE [table_id] - -## example - 1. 根据 table id 查找对应的 database name, table name - SHOW TABLE 10001; - -## keyword - SHOW,TABLE - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md deleted file mode 100644 index 44e396b9df..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLES.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -{ - "title": "SHOW TABLES", - "language": "zh-CN" -} ---- - - - -# SHOW TABLES -## description - 该语句用于展示当前 db 下所有的 table - 语法: - SHOW TABLES; - -## keyword - SHOW,TABLES - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md deleted file mode 100644 index d2de94f73a..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLET.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -{ - "title": "SHOW TABLET", - "language": "zh-CN" -} ---- - - - -# SHOW TABLET -## description - 该语句用于显示指定tablet id 信息(仅管理员使用) - 语法: - SHOW TABLET tablet_id - - -## example - // 显示指定 tablet id 为 10000 的 tablet 的父层级 id 信息 - SHOW TABLET 10000; - -## keyword - SHOW,TABLET,LIMIT - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md deleted file mode 100644 index 113a69ea7d..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TABLETS.md +++ /dev/null @@ -1,65 +0,0 @@ ---- -{ - "title": "SHOW TABLETS", - "language": "zh-CN" -} ---- - - - -# SHOW TABLETS -## description - 该语句用于显示 tablet 相关的信息(仅管理员使用) - 语法: - SHOW TABLETS - [FROM [db_name.]table_name] [partiton(partition_name_1, partition_name_1)] - [where [version=1] [and backendid=10000] [and state="NORMAL|ROLLUP|CLONE|DECOMMISSION"]] - [order by order_column] - [limit [offset,]size] - - 现在show tablets命令支持按照按照以下字段进行过滤:partition, index name, version, backendid, - state,同时支持按照任意字段进行排序,并且提供limit限制返回条数。 - -## example - // 显示指定 db 的下指定表所有 tablet 信息 - SHOW TABLETS FROM example_db.table_name; - - // 获取partition p1和p2的tablet信息 - SHOW TABLETS FROM example_db.table_name partition(p1, p2); - - // 获取10个结果 - SHOW TABLETS FROM example_db.table_name limit 10; - - // 从偏移5开始获取10个结果 - SHOW TABLETS FROM example_db.table_name limit 5,10; - - // 按照backendid/version/state字段进行过滤 - SHOW TABLETS FROM example_db.table_name where backendid=10000 and version=1 and state="NORMAL"; - - // 按照version字段进行排序 - SHOW TABLETS FROM example_db.table_name where backendid=10000 order by version; - - // 获取index名字为t1_rollup的tablet相关信息 - SHOW TABLETS FROM example_db.table_name where indexname="t1_rollup"; - - -## keyword - SHOW,TABLETS,LIMIT - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md deleted file mode 100644 index c3a32fac78..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW TRANSACTION.md +++ /dev/null @@ -1,101 +0,0 @@ ---- -{ - "title": "SHOW TRANSACTION", - "language": "zh-CN" -} ---- - - - -# SHOW TRANSACTION -## description - -该语法用于查看指定 transaction id 、指定label 或指定事务状态的事务详情。 - -语法: - -``` -SHOW TRANSACTION -[FROM db_name] -WHERE -[id = transaction_id] -[label = label_name] -[status = transaction_status]; -``` - -返回结果示例: - -``` - TransactionId: 4005 - Label: insert_8d807d5d-bcdd-46eb-be6d-3fa87aa4952d - Coordinator: FE: 10.74.167.16 - TransactionStatus: VISIBLE - LoadJobSourceType: INSERT_STREAMING - PrepareTime: 2020-01-09 14:59:07 - PreCommitTime: 2020-01-09 14:59:07 - CommitTime: 2020-01-09 14:59:09 - FinishTime: 2020-01-09 14:59:09 - Reason: -ErrorReplicasCount: 0 - ListenerId: -1 - TimeoutMs: 300000 -``` - -* TransactionId:事务id -* Label:导入任务对应的 label -* Coordinator:负责事务协调的节点 -* TransactionStatus:事务状态 - * PREPARE:准备阶段 - * PRECOMMITTED:预提交阶段 - * COMMITTED:事务成功,但数据不可见 - * VISIBLE:事务成功且数据可见 - * ABORTED:事务失败 -* LoadJobSourceType:导入任务的类型。 -* PrepareTime:事务开始时间 -* PreCommitTime:事务预提交时间 -* CommitTime:事务提交成功的时间 -* FinishTime:数据可见的时间 -* Reason:错误信息 -* ErrorReplicasCount:有错误的副本数 -* ListenerId:相关的导入作业的id -* TimeoutMs:事务超时时间,单位毫秒 - -## example - -1. 查看 id 为 4005 的事务: - - SHOW TRANSACTION WHERE ID=4005; - -2. 指定 db 中,查看 id 为 4005 的事务: - - SHOW TRANSACTION FROM db WHERE ID=4005; - -3. 查看 label 为 label_name的事务: - - SHOW TRANSACTION WHERE LABEL = 'label_name'; - -4. 查看 status 为 visible 的事务: - - SHOW TRANSACTION WHERE STATUS = 'visible'; - -## keyword - - SHOW, TRANSACTION - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md deleted file mode 100644 index 1a7f9f3abe..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SHOW-LAST-INSERT.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -{ - "title": "SHOW LAST INSERT", - "language": "zh-CN" -} ---- - - - -# SHOW LAST INSERT -## description - -该语法用于查看在当前session连接中,最近一次 insert 操作的结果 - -语法: - -``` -SHOW LAST INSERT -``` - -返回结果示例: - -``` - TransactionId: 64067 - Label: insert_ba8f33aea9544866-8ed77e2844d0cc9b - Database: default_cluster:db1 - Table: t1 -TransactionStatus: VISIBLE - LoadedRows: 2 - FilteredRows: 0 -``` - -* TransactionId:事务id -* Label:insert任务对应的 label -* Database:insert对应的数据库 -* Table:insert对应的表 -* TransactionStatus:事务状态 - * PREPARE:准备阶段 - * PRECOMMITTED:预提交阶段 - * COMMITTED:事务成功,但数据不可见 - * VISIBLE:事务成功且数据可见 - * ABORTED:事务失败 -* LoadedRows:导入的行数 -* FilteredRows:被过滤的行数 - -## example - -## keyword - - SHOW, LAST, INSERT - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SPARK LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SPARK LOAD.md deleted file mode 100644 index 9f8ab727de..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/SPARK LOAD.md +++ /dev/null @@ -1,265 +0,0 @@ ---- -{ - "title": "SPARK LOAD", - "language": "zh-CN" -} ---- - - - -# SPARK LOAD -## description - - Spark load 通过外部的 Spark 资源实现对导入数据的预处理,提高 Doris 大数据量的导入性能并且节省 Doris 集群的计算资源。主要用于初次迁移,大数据量导入 Doris 的场景。 - - Spark load 是一种异步导入方式,用户需要通过 MySQL 协议创建 Spark 类型导入任务,并通过 `SHOW LOAD` 查看导入结果。 - -语法: - - LOAD LABEL load_label - ( - data_desc1[, data_desc2, ...] - ) - WITH RESOURCE resource_name - [resource_properties] - [opt_properties]; - - 1. load_label - - 当前导入批次的标签。在一个 database 内唯一。 - 语法: - [database_name.]your_label - - 2. data_desc - - 用于描述一批导入数据。 - 语法: - DATA INFILE - ( - "file_path1"[, file_path2, ...] - ) - [NEGATIVE] - INTO TABLE `table_name` - [PARTITION (p1, p2)] - [COLUMNS TERMINATED BY "column_separator"] - [FORMAT AS "file_type"] - [(column_list)] - [COLUMNS FROM PATH AS (col2, ...)] - [SET (k1 = func(k2))] - [WHERE predicate] - - DATA FROM TABLE hive_external_tbl - [NEGATIVE] - INTO TABLE tbl_name - [PARTITION (p1, p2)] - [SET (k1=f1(xx), k2=f2(xx))] - [WHERE predicate] - - 说明: - file_path: - - 文件路径,可以指定到一个文件,也可以用 * 通配符指定某个目录下的所有文件。通配符必须匹配到文件,而不能是目录。 - - hive_external_tbl: - - hive 外部表名。 - 要求导入的 doris 表中的列必须在 hive 外部表中存在。 - 每个导入任务只支持从一个 hive 外部表导入。 - 不能与 file_path 方式同时使用。 - - PARTITION: - - 如果指定此参数,则只会导入指定的分区,导入分区以外的数据会被过滤掉。 - 如果不指定,默认导入table的所有分区。 - - NEGATIVE: - - 如果指定此参数,则相当于导入一批“负”数据。用于抵消之前导入的同一批数据。 - 该参数仅适用于存在 value 列,并且 value 列的聚合类型仅为 SUM 的情况。 - - column_separator: - - 用于指定导入文件中的列分隔符。默认为 \t - 如果是不可见字符,则需要加\\x作为前缀,使用十六进制来表示分隔符。 - 如hive文件的分隔符\x01,指定为"\\x01" - - file_type: - - 用于指定导入文件的类型,目前仅支持csv。 - - column_list: - - 用于指定导入文件中的列和 table 中的列的对应关系。 - 当需要跳过导入文件中的某一列时,将该列指定为 table 中不存在的列名即可。 - 语法: - (col_name1, col_name2, ...) - - SET: - - 如果指定此参数,可以将源文件某一列按照函数进行转化,然后将转化后的结果导入到table中。语法为 `column_name` = expression。 - 仅支持Spark SQL built-in functions,具体可参考 https://spark.apache.org/docs/2.4.6/api/sql/index.html。 - 举几个例子帮助理解。 - 例1: 表中有3个列“c1, c2, c3", 源文件中前两列依次对应(c1,c2),后两列之和对应c3;那么需要指定 columns (c1,c2,tmp_c3,tmp_c4) SET (c3=tmp_c3+tmp_c4); - 例2: 表中有3个列“year, month, day",源文件中只有一个时间列,为”2018-06-01 01:02:03“格式。 - 那么可以指定 columns(tmp_time) set (year = year(tmp_time), month=month(tmp_time), day=day(tmp_time)) 完成导入。 - - WHERE: - - 对做完 transform 的数据进行过滤,符合 where 条件的数据才能被导入。WHERE 语句中只可引用表中列名。 - 3. resource_name - - 所使用的 spark 资源名称,可以通过 `SHOW RESOURCES` 命令查看。 - - 4. resource_properties - - 当用户有临时性的需求,比如增加任务使用的资源而修改 Spark configs,可以在这里设置,设置仅对本次任务生效,并不影响 Doris 集群中已有的配置。 - 另外不同的 broker,以及不同的访问方式,需要提供的信息不同。可以查看 broker 使用文档。 - - 4. opt_properties - - 用于指定一些特殊参数。 - 语法: - [PROPERTIES ("key"="value", ...)] - - 可以指定如下参数: - timeout: 指定导入操作的超时时间。默认超时为4小时。单位秒。 - max_filter_ratio:最大容忍可过滤(数据不规范等原因)的数据比例。默认零容忍。 - strict mode: 是否对数据进行严格限制。默认为 false。 - timezone: 指定某些受时区影响的函数的时区,如 strftime/alignment_timestamp/from_unixtime 等等,具体请查阅 [时区] 文档。如果不指定,则使用 "Asia/Shanghai" 时区。 - - 5. 导入数据格式样例 - - 整型类(TINYINT/SMALLINT/INT/BIGINT/LARGEINT):1, 1000, 1234 - 浮点类(FLOAT/DOUBLE/DECIMAL):1.1, 0.23, .356 - 日期类(DATE/DATETIME):2017-10-03, 2017-06-13 12:34:03。 - (注:如果是其他日期格式,可以在导入命令中,使用 strftime 或者 time_format 函数进行转换) - 字符串类(CHAR/VARCHAR):"I am a student", "a" - NULL值:\N - -## example - - 1. 从 HDFS 导入一批数据,指定超时时间和过滤比例。使用名为 my_spark 的 spark 资源。 - - LOAD LABEL example_db.label1 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - ) - WITH RESOURCE 'my_spark' - PROPERTIES - ( - "timeout" = "3600", - "max_filter_ratio" = "0.1" - ); - - 其中 hdfs_host 为 namenode 的 host,hdfs_port 为 fs.defaultFS 端口(默认9000) - - 2. 从 HDFS 导入一批"负"数据,指定分隔符为逗号,使用通配符*指定目录下的所有文件,并指定 spark 资源的临时参数。 - - LOAD LABEL example_db.label3 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/*") - NEGATIVE - INTO TABLE `my_table` - COLUMNS TERMINATED BY "," - ) - WITH RESOURCE 'my_spark' - ( - "spark.executor.memory" = "3g", - "broker.username" = "hdfs_user", - "broker.password" = "hdfs_passwd" - ); - - 3. 从 HDFS 导入一批数据,指定分区, 并对导入文件的列做一些转化,如下: - 表结构为: - k1 varchar(20) - k2 int - - 假设数据文件只有一行数据: - - Adele,1,1 - - 数据文件中各列,对应导入语句中指定的各列: - k1,tmp_k2,tmp_k3 - - 转换如下: - - 1) k1: 不变换 - 2) k2:是 tmp_k2 和 tmp_k3 数据之和 - - LOAD LABEL example_db.label6 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - PARTITION (p1, p2) - COLUMNS TERMINATED BY "," - (k1, tmp_k2, tmp_k3) - SET ( - k2 = tmp_k2 + tmp_k3 - ) - ) - WITH RESOURCE 'my_spark'; - - 4. 提取文件路径中的分区字段 - - 如果需要,则会根据表中定义的字段类型解析文件路径中的分区字段(partitioned fields),类似Spark中Partition Discovery的功能 - - LOAD LABEL example_db.label10 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/*/*") - INTO TABLE `my_table` - (k1, k2, k3) - COLUMNS FROM PATH AS (city, utc_date) - SET (uniq_id = md5sum(k1, city)) - ) - WITH RESOURCE 'my_spark'; - - hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing目录下包括如下文件: - - [hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/utc_date=2019-06-26/0000.csv, hdfs://hdfs_host:hdfs_port/user/palo/data/input/dir/city=beijing/utc_date=2019-06-26/0001.csv, ...] - - 则提取文件路径的中的city和utc_date字段 - - 5. 对待导入数据进行过滤,k1 值大于 10 的列才能被导入。 - - LOAD LABEL example_db.label10 - ( - DATA INFILE("hdfs://hdfs_host:hdfs_port/user/palo/data/input/file") - INTO TABLE `my_table` - WHERE k1 > 10 - ) - WITH RESOURCE 'my_spark'; - - 6. 从 hive 外部表导入,并将源表中的 uuid 列通过全局字典转化为 bitmap 类型。 - - LOAD LABEL db1.label1 - ( - DATA FROM TABLE hive_t1 - INTO TABLE tbl1 - SET - ( - uuid=bitmap_dict(uuid) - ) - ) - WITH RESOURCE 'my_spark'; - -## keyword - - SPARK,LOAD diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md deleted file mode 100644 index 12cdd58824..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP ROUTINE LOAD.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -{ - "title": "STOP ROUTINE LOAD", - "language": "zh-CN" -} ---- - - - -# STOP ROUTINE LOAD -## example - -1. 停止名称为 test1 的例行导入作业。 - - STOP ROUTINE LOAD FOR test1; - -## keyword - STOP,ROUTINE,LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md deleted file mode 100644 index cf9cbca00f..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STOP SYNC JOB.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -{ - "title": "STOP SYNC JOB", - "language": "zh-CN" -} ---- - - - -# STOP SYNC JOB - -## description - -通过 `job_name` 停止一个数据库内非停止状态的常驻数据同步作业。 - -语法: - - STOP SYNC JOB [db.]job_name - -## example - -1. 停止名称为 `job_name` 的数据同步作业 - - STOP SYNC JOB `job_name`; - -## keyword - STOP,SYNC,JOB,BINLOG \ No newline at end of file diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md deleted file mode 100644 index 0c167d0f62..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md +++ /dev/null @@ -1,226 +0,0 @@ ---- -{ - "title": "STREAM LOAD", - "language": "zh-CN" -} ---- - - - -# STREAM LOAD -## description - NAME: - stream-load: load data to table in streaming - - SYNOPSIS - curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_host:http_port/api/{db}/{table}/_stream_load - - DESCRIPTION - 该语句用于向指定的 table 导入数据,与普通Load区别是,这种导入方式是同步导入。 - 这种导入方式仍然能够保证一批导入任务的原子性,要么全部数据导入成功,要么全部失败。 - 该操作会同时更新和此 base table 相关的 rollup table 的数据。 - 这是一个同步操作,整个数据导入工作完成后返回给用户导入结果。 - 当前支持HTTP chunked与非chunked上传两种方式,对于非chunked方式,必须要有Content-Length来标示上传内容长度,这样能够保证数据的完整性。 - 另外,用户最好设置Expect Header字段内容100-continue,这样可以在某些出错场景下避免不必要的数据传输。 - - OPTIONS - 用户可以通过HTTP的Header部分来传入导入参数 - - label: 一次导入的标签,相同标签的数据无法多次导入。用户可以通过指定Label的方式来避免一份数据重复导入的问题。 - 当前Palo内部保留30分钟内最近成功的label。 - - column_separator:用于指定导入文件中的列分隔符,默认为\t。如果是不可见字符,则需要加\x作为前缀,使用十六进制来表示分隔符。 - 如hive文件的分隔符\x01,需要指定为-H "column_separator:\x01"。 - 可以使用多个字符的组合作为列分隔符。 - - line_delimiter:用于指定导入文件中的换行符,默认为\n。 - 可以使用做多个字符的组合作为换行符。 - - columns:用于指定导入文件中的列和 table 中的列的对应关系。如果源文件中的列正好对应表中的内容,那么是不需要指定这个字段的内容的。 - 如果源文件与表schema不对应,那么需要这个字段进行一些数据转换。这里有两种形式column,一种是直接对应导入文件中的字段,直接使用字段名表示; - 一种是衍生列,语法为 `column_name` = expression。举几个例子帮助理解。 - 例1: 表中有3个列“c1, c2, c3”,源文件中的三个列一次对应的是"c3,c2,c1"; 那么需要指定-H "columns: c3, c2, c1" - 例2: 表中有3个列“c1, c2, c3", 源文件中前三列依次对应,但是有多余1列;那么需要指定-H "columns: c1, c2, c3, xxx"; - 最后一个列随意指定个名称占位即可 - 例3: 表中有3个列“year, month, day"三个列,源文件中只有一个时间列,为”2018-06-01 01:02:03“格式; - 那么可以指定-H "columns: col, year = year(col), month=month(col), day=day(col)"完成导入 - - where: 用于抽取部分数据。用户如果有需要将不需要的数据过滤掉,那么可以通过设定这个选项来达到。 - 例1: 只导入大于k1列等于20180601的数据,那么可以在导入时候指定-H "where: k1 = 20180601" - - max_filter_ratio:最大容忍可过滤(数据不规范等原因)的数据比例。默认零容忍。数据不规范不包括通过 where 条件过滤掉的行。 - - partitions: 用于指定这次导入所设计的partition。如果用户能够确定数据对应的partition,推荐指定该项。不满足这些分区的数据将被过滤掉。 - 比如指定导入到p1, p2分区,-H "partitions: p1, p2" - - timeout: 指定导入的超时时间。单位秒。默认是 600 秒。可设置范围为 1 秒 ~ 259200 秒。 - - strict_mode: 用户指定此次导入是否开启严格模式,默认为关闭。开启方式为 -H "strict_mode: true"。 - - timezone: 指定本次导入所使用的时区。默认为东八区。该参数会影响所有导入涉及的和时区有关的函数结果。 - - exec_mem_limit: 导入内存限制。默认为 2GB。单位为字节。 - - format: 指定导入数据格式,默认是csv,支持json格式和带header的csv格式: csv_with_names、csv_with_names_and_types。 - - jsonpaths: 导入json方式分为:简单模式和匹配模式。 - 简单模式:没有设置jsonpaths参数即为简单模式,这种模式下要求json数据是对象类型,例如: - {"k1":1, "k2":2, "k3":"hello"},其中k1,k2,k3是列名字。 - - 匹配模式:用于json数据相对复杂,需要通过jsonpaths参数匹配对应的value。 - - strip_outer_array: 布尔类型,为true表示json数据以数组对象开始且将数组对象中进行展平,默认值是false。例如: - [ - {"k1" : 1, "v1" : 2}, - {"k1" : 3, "v1" : 4} - ] - 当strip_outer_array为true,最后导入到doris中会生成两行数据。 - - json_root: json_root为合法的jsonpath字符串,用于指定json document的根节点,默认值为""。 - - merge_type: 数据的合并类型,一共支持三种类型APPEND、DELETE、MERGE 其中,APPEND是默认值,表示这批数据全部需要追加到现有数据中,DELETE 表示删除与这批数据key相同的所有行,MERGE 语义 需要与delete 条件联合使用,表示满足delete 条件的数据按照DELETE 语义处理其余的按照APPEND 语义处理, 示例:`-H "merge_type: MERGE" -H "delete: flag=1"` - delete: 仅在 MERGE下有意义, 表示数据的删除条件 - - function_column.sequence_col: 只适用于UNIQUE_KEYS,相同key列下,保证value列按照source_sequence列进行REPLACE, - source_sequence可以是数据源中的列,也可以是表结构中的一列。 - - fuzzy_parse: 布尔类型,为true表示json将以第一行为schema 进行解析,开启这个选项可以提高json 导入效率,但是要求所有json 对象的key的顺序和第一行一致, 默认为false,仅用于json 格式 - - num_as_string: 布尔类型,为true表示在解析json数据时会将数字类型转为字符串,然后在确保不会出现精度丢失的情况下进行导入。 - - read_json_by_line: 布尔类型,为true表示支持每行读取一个json对象,默认值为false。 - - send_batch_parallelism: 整型,用于设置发送批处理数据的并行度,如果并行度的值超过 BE 配置中的 `max_send_batch_parallelism_per_job`,那么作为协调点的 BE 将使用 `max_send_batch_parallelism_per_job` 的值。 - - load_to_single_tablet: 布尔类型,为true表示支持一个任务只导入数据到对应分区的一个tablet,默认值为false,该参数只允许在对带有random分区的olap表导数的时候设置。 - - RETURN VALUES - 导入完成后,会以Json格式返回这次导入的相关内容。当前包括以下字段 - Status: 导入最后的状态。 - Success:表示导入成功,数据已经可见; - Publish Timeout:表述导入作业已经成功Commit,但是由于某种原因并不能立即可见。用户可以视作已经成功不必重试导入 - Label Already Exists: 表明该Label已经被其他作业占用,可能是导入成功,也可能是正在导入。 - 用户需要通过get label state命令来确定后续的操作 - 其他:此次导入失败,用户可以指定Label重试此次作业 - Message: 导入状态详细的说明。失败时会返回具体的失败原因。 - NumberTotalRows: 从数据流中读取到的总行数 - NumberLoadedRows: 此次导入的数据行数,只有在Success时有效 - NumberFilteredRows: 此次导入过滤掉的行数,即数据质量不合格的行数 - NumberUnselectedRows: 此次导入,通过 where 条件被过滤掉的行数 - LoadBytes: 此次导入的源文件数据量大小 - LoadTimeMs: 此次导入所用的时间 - BeginTxnTimeMs: 向Fe请求开始一个事务所花费的时间,单位毫秒。 - StreamLoadPutTimeMs: 向Fe请求获取导入数据执行计划所花费的时间,单位毫秒。 - ReadDataTimeMs: 读取数据所花费的时间,单位毫秒。 - WriteDataTimeMs: 执行写入数据操作所花费的时间,单位毫秒。 - CommitAndPublishTimeMs: 向Fe请求提交并且发布事务所花费的时间,单位毫秒。 - ErrorURL: 被过滤数据的具体内容,仅保留前1000条 - - ERRORS - 可以通过以下语句查看导入错误详细信息: - - SHOW LOAD WARNINGS ON 'url' - - 其中 url 为 ErrorURL 给出的 url。 - -## example - - 1. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表,使用Label用于去重。指定超时时间为 100 秒 - curl --location-trusted -u root -H "label:123" -H "timeout:100" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 2. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表,使用Label用于去重, 并且只导入k1等于20180601的数据 - curl --location-trusted -u root -H "label:123" -H "where: k1=20180601" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 3. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表, 允许20%的错误率(用户是defalut_cluster中的) - curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 4. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表, 允许20%的错误率,并且指定文件的列名(用户是defalut_cluster中的) - curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -H "columns: k2, k1, v1" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 5. 将本地文件'testData'中的数据导入到数据库'testDb'中'testTbl'的表中的p1, p2分区, 允许20%的错误率。 - curl --location-trusted -u root -H "label:123" -H "max_filter_ratio:0.2" -H "partitions: p1, p2" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 6. 使用streaming方式导入(用户是defalut_cluster中的) - seq 1 10 | awk '{OFS="\t"}{print $1, $1 * 10}' | curl --location-trusted -u root -T - http://host:port/api/testDb/testTbl/_stream_load - - 7. 导入含有HLL列的表,可以是表中的列或者数据中的列用于生成HLL列,也可使用hll_empty补充数据中没有的列 - curl --location-trusted -u root -H "columns: k1, k2, v1=hll_hash(k1), v2=hll_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 8. 导入数据进行严格模式过滤,并设置时区为 Africa/Abidjan - curl --location-trusted -u root -H "strict_mode: true" -H "timezone: Africa/Abidjan" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 9. 导入含有BITMAP列的表,可以是表中的列或者数据中的列用于生成BITMAP列,也可以使用bitmap_empty填充空的Bitmap - curl --location-trusted -u root -H "columns: k1, k2, v1=to_bitmap(k1), v2=bitmap_empty()" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 10. 导入含有QUANTILE_STATE列的表,可以是表中的列或者数据中的列用于生成QUANTILE_STATE列,也可以使用to_quantile_state 函数将**数值类型**的原始数据转化为**QUANTILE_STATE**类型, 其中2048是可以选参数 代表 TDigest 算法的精度,有效值为[2048,10000],数值越大精度越高,默认2048 - curl --location-trusted -u root -H "columns: k1, k2, v1, v2, v1=to_quantile_state(v1, 2048)" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 11. 简单模式,导入json数据 - 表结构: - - `category` varchar(512) NULL COMMENT "", - `author` varchar(512) NULL COMMENT "", - `title` varchar(512) NULL COMMENT "", - `price` double NULL COMMENT "" - json数据格式: - {"category":"C++","author":"avc","title":"C++ primer","price":895} - 导入命令: - curl --location-trusted -u root -H "label:123" -H "format: json" -T testData http://host:port/api/testDb/testTbl/_stream_load - 为了提升吞吐量,支持一次性导入多条json数据,每行为一个json对象,默认使用\n作为换行符,需要将read_json_by_line设置为true,json数据格式如下: - {"category":"C++","author":"avc","title":"C++ primer","price":89.5} - {"category":"Java","author":"avc","title":"Effective Java","price":95} - {"category":"Linux","author":"avc","title":"Linux kernel","price":195} - - 12. 匹配模式,导入json数据 - json数据格式: - [ - {"category":"xuxb111","author":"1avc","title":"SayingsoftheCentury","price":895}, - {"category":"xuxb222","author":"2avc","title":"SayingsoftheCentury","price":895}, - {"category":"xuxb333","author":"3avc","title":"SayingsoftheCentury","price":895} - ] - 通过指定jsonpath进行精准导入,例如只导入category、author、price三个属性 - curl --location-trusted -u root -H "columns: category, price, author" -H "label:123" -H "format: json" -H "jsonpaths: [\"$.category\",\"$.price\",\"$.author\"]" -H "strip_outer_array: true" -T testData http://host:port/api/testDb/testTbl/_stream_load - 说明: - 1)如果json数据是以数组开始,并且数组中每个对象是一条记录,则需要将strip_outer_array设置成true,表示展平数组。 - 2)如果json数据是以数组开始,并且数组中每个对象是一条记录,在设置jsonpath时,我们的ROOT节点实际上是数组中对象。 - - 13. 用户指定json根节点 - json数据格式: - { - "RECORDS":[ - {"category":"11","title":"SayingsoftheCentury","price":895,"timestamp":1589191587}, - {"category":"22","author":"2avc","price":895,"timestamp":1589191487}, - {"category":"33","author":"3avc","title":"SayingsoftheCentury","timestamp":1589191387} - ] - } - 通过指定jsonpath进行精准导入,例如只导入category、author、price三个属性 - curl --location-trusted -u root -H "columns: category, price, author" -H "label:123" -H "format: json" -H "jsonpaths: [\"$.category\",\"$.price\",\"$.author\"]" -H "strip_outer_array: true" -H "json_root: $.RECORDS" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 14. 删除与这批导入key 相同的数据 - curl --location-trusted -u root -H "merge_type: DELETE" -T testData http://host:port/api/testDb/testTbl/_stream_load - 15. 将这批数据中与flag 列为ture 的数据相匹配的列删除,其他行正常追加 - curl --location-trusted -u root: -H "column_separator:," -H "columns: siteid, citycode, username, pv, flag" -H "merge_type: MERGE" -H "delete: flag=1" -T testData http://host:port/api/testDb/testTbl/_stream_load - - 16. 导入数据到含有sequence列的UNIQUE_KEYS表中 - curl --location-trusted -u root -H "columns: k1,k2,source_sequence,v1,v2" -H "function_column.sequence_col: source_sequence" -T testData http://host:port/api/testDb/testTbl/_stream_load - -## keyword - STREAM,LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/UPDATE.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/UPDATE.md deleted file mode 100644 index 861cd4baf7..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/UPDATE.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -{ - "title": "UPDATE", - "language": "zh-CN" -} ---- - - - -# UPDATE -## description -### Syntax - -``` -UPDATE table_name - SET assignment_list - WHERE expression - -value: - {expr | DEFAULT} - -assignment: - col_name = value - -assignment_list: - assignment [, assignment] ... -``` - -### Parameters - -+ table_name: 待更新数据的目标表。可以是 'db_name.table_name' 形式 -+ assignment_list: 待更新的目标列,形如 'col_name = value, col_name = value' 格式 -+ where expression: 期望更新的条件,一个返回 true 或者 false 的表达式即可 - -### Note - -当前 UPDATE 语句仅支持在 Unique 模型上的行更新,存在并发更新导致的数据冲突可能。 -目前 Doris 并不处理这类问题,需要用户从业务侧规避这类问题。 - -## example - -`test` 表是一个 unique 模型的表,包含: k1, k2, v1, v2 四个列。其中 k1, k2 是 key,v1, v2 是value,聚合方式是 Replace。 - -1. 将 'test' 表中满足条件 k1 =1 , k2 =2 的 v1 列更新为 1 - -``` -UPDATE test SET v1 = 1 WHERE k1=1 and k2=2; -``` - -2. 将 'test' 表中 k1=1 的列的 v1 列自增1 - -``` -UPDATE test SET v1 = v1+1 WHERE k1=1; -``` - -## keyword - - UPDATE diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md deleted file mode 100644 index 52544a59f0..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/alter-routine-load.md +++ /dev/null @@ -1,119 +0,0 @@ ---- -{ - "title": "ALTER ROUTINE LOAD", - "language": "zh-CN" -} ---- - - - -# ALTER ROUTINE LOAD -## description - -该语法用于修改已经创建的例行导入作业。 - -只能修改处于 PAUSED 状态的作业。 - -语法: - - ALTER ROUTINE LOAD FOR [db.]job_name - [job_properties] - FROM data_source - [data_source_properties] - -1. `[db.]job_name` - - 指定要修改的作业名称。 - -2. `tbl_name` - - 指定需要导入的表的名称。 - -3. `job_properties` - - 指定需要修改的作业参数。目前仅支持如下参数的修改: - - 1. `desired_concurrent_number` - 2. `max_error_number` - 3. `max_batch_interval` - 4. `max_batch_rows` - 5. `max_batch_size` - 6. `jsonpaths` - 7. `json_root` - 8. `strip_outer_array` - 9. `strict_mode` - 10. `timezone` - 11. `num_as_string` - 12. `fuzzy_parse` - - -4. `data_source` - - 数据源的类型。当前支持: - - KAFKA - -5. `data_source_properties` - - 数据源的相关属性。目前仅支持: - - 1. `kafka_partitions` - 2. `kafka_offsets` - 3. `kafka_broker_list` - 4. `kafka_topic` - 5. 自定义 property,如 `property.group.id` - - 注: - - 1. `kafka_partitions` 和 `kafka_offsets` 用于修改待消费的 kafka partition 的offset,仅能修改当前已经消费的 partition。不能新增 partition。 - -## example - -1. 将 `desired_concurrent_number` 修改为 1 - - ``` - ALTER ROUTINE LOAD FOR db1.label1 - PROPERTIES - ( - "desired_concurrent_number" = "1" - ); - ``` - -2. 将 `desired_concurrent_number` 修改为 10,修改 partition 的offset,修改 group id。 - - ``` - ALTER ROUTINE LOAD FOR db1.label1 - PROPERTIES - ( - "desired_concurrent_number" = "10" - ) - FROM kafka - ( - "kafka_partitions" = "0, 1, 2", - "kafka_offsets" = "100, 200, 100", - "property.group.id" = "new_group" - ); - ``` - - -## keyword - - ALTER,ROUTINE,LOAD - diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/insert.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/insert.md deleted file mode 100644 index f19e1477b4..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/insert.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -{ - "title": "INSERT", - "language": "zh-CN" -} ---- - - - -# INSERT -## description -### Syntax - -``` -INSERT INTO table_name - [ PARTITION (p1, ...) ] - [ WITH LABEL label] - [ (column [, ...]) ] - [ [ hint [, ...] ] ] - { VALUES ( { expression | DEFAULT } [, ...] ) [, ...] | query } -``` - -### Parameters - -> tablet_name: 导入数据的目的表。可以是 `db_name.table_name` 形式 -> -> partitions: 指定待导入的分区,必须是 `table_name` 中存在的分区,多个分区名称用逗号分隔 -> -> label: 为 Insert 任务指定一个 label -> -> column_name: 指定的目的列,必须是 `table_name` 中存在的列 -> -> expression: 需要赋值给某个列的对应表达式 -> -> DEFAULT: 让对应列使用默认值 -> -> query: 一个普通查询,查询的结果会写入到目标中 -> -> hint: 用于指示 `INSERT` 执行行为的一些指示符。`streaming` 和 默认的非 `streaming` 方式均会使用同步方式完成 `INSERT` 语句执行 -> 非 `streaming` 方式在执行完成后会返回一个 label 方便用户通过 `SHOW LOAD` 查询导入的状态 - -### Note - -当前执行 `INSERT` 语句时,对于有不符合目标表格式的数据,默认的行为是过滤,比如字符串超长等。但是对于有要求数据不能够被过滤的业务场景,可以通过设置会话变量 `enable_insert_strict` 为 `true` 来确保当有数据被过滤掉的时候,`INSERT` 不会被执行成功。 - -## example - -`test` 表包含两个列`c1`, `c2`。 - -1. 向`test`表中导入一行数据 - -``` -INSERT INTO test VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, 2); -INSERT INTO test (c1, c2) VALUES (1, DEFAULT); -INSERT INTO test (c1) VALUES (1); -``` - -其中第一条、第二条语句是一样的效果。在不指定目标列时,使用表中的列顺序来作为默认的目标列。 -第三条、第四条语句表达的意思是一样的,使用`c2`列的默认值,来完成数据导入。 - -2. 向`test`表中一次性导入多行数据 - -``` -INSERT INTO test VALUES (1, 2), (3, 2 + 2); -INSERT INTO test (c1, c2) VALUES (1, 2), (3, 2 * 2); -INSERT INTO test (c1) VALUES (1), (3); -INSERT INTO test (c1, c2) VALUES (1, DEFAULT), (3, DEFAULT); -``` - -其中第一条、第二条语句效果一样,向`test`表中一次性导入两条数据 -第三条、第四条语句效果已知,使用`c2`列的默认值向`test`表中导入两条数据 - -3. 向 `test` 表中导入一个查询语句结果 - -``` -INSERT INTO test SELECT * FROM test2; -INSERT INTO test (c1, c2) SELECT * from test2; -``` - -4. 向 `test` 表中导入一个查询语句结果,并指定 partition 和 label - -``` -INSERT INTO test PARTITION(p1, p2) WITH LABEL `label1` SELECT * FROM test2; -INSERT INTO test WITH LABEL `label1` (c1, c2) SELECT * from test2; -``` - -异步的导入其实是,一个同步的导入封装成了异步。填写 streaming 和不填写的**执行效率是一样**的。 - -由于Doris之前的导入方式都是异步导入方式,为了兼容旧有的使用习惯,不加 streaming 的 `INSERT` 语句依旧会返回一个 label,用户需要通过`SHOW LOAD`命令查看此`label`导入作业的状态。 - -## keyword - - INSERT diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md deleted file mode 100644 index be9af8810e..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/lateral-view.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -{ - "title": "Lateral View", - "language": "zh-CN" -} ---- - - - -# Lateral View - -## description - -Lateral view 语法可以搭配 Table Function,完成将一行数据扩展成多行(列转行)的需求。 - -语法: - -``` -... -FROM table_name -lateral_view_ref[ lateral_view_ref ...] - -lateral_view_ref: - -LATERAL VIEW table_function(...) view_alias as col_name -``` - -Lateral view 子句必须跟随在表名或子查询之后。可以包含多个 Lateral view 子句。`view_alias` 是对应 Lateral View 的名称。`col_name` 是表函数 `table_function` 产出的列名。 - -目前支持的表函数: - -1. `explode_split` -2. `explode_bitmap` -3. `explode_json_array` - -具体函数说明可参阅对应语法帮助文档。 - -table 中的数据会和各个 Lateral View 产生的结果集做笛卡尔积后返回上层。 - -## example - -这里只给出 Lateral View 的语法示例,具体含义和产出的结果说明,可参阅对应表函数帮助文档。 - -1. - -``` -select k1, e1 from tbl1 -lateral view explode_split(v1, ',') tmp1 as e1 where e1 = "abc"; -``` - -2. - -``` -select k1, e1, e2 from tbl2 -lateral view explode_split(v1, ',') tmp1 as e1 -lateral view explode_bitmap(bitmap1) tmp2 as e2 -where e2 > 3; -``` - -3. - -``` -select k1, e1, e2 from tbl3 -lateral view explode_json_array_int("[1,2,3]") tmp1 as e1 -lateral view explode_bitmap(bitmap_from_string("4,5,6")) tmp2 as e2; -``` - -4. - -``` -select k1, e1 from (select k1, bitmap_union(members) as x from tbl1 where k1=10000 group by k1)tmp1 -lateral view explode_bitmap(x) tmp2 as e1; -``` - -## keyword - - LATERAL, VIEW diff --git a/docs/zh-CN/sql-reference/sql-statements/Utility/DESCRIBE.md b/docs/zh-CN/sql-reference/sql-statements/Utility/DESCRIBE.md deleted file mode 100644 index 9522025095..0000000000 --- a/docs/zh-CN/sql-reference/sql-statements/Utility/DESCRIBE.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -{ - "title": "DESCRIBE", - "language": "zh-CN" -} ---- - - - -# DESCRIBE -## description - 该语句用于展示指定 table 的 schema 信息 - 语法: - DESC[RIBE] [db_name.]table_name [ALL]; - - 说明: - 如果指定 ALL,则显示该 table 的所有 index(rollup) 的 schema - -## example - -1. 显示Base表Schema - - DESC table_name; - -2. 显示表所有 index 的 schema - - DESC db1.table_name ALL; - -## keyword - - DESCRIBE,DESC diff --git a/new-docs/zh-CN/summary/basic-summary.md b/docs/zh-CN/summary/basic-summary.md similarity index 96% rename from new-docs/zh-CN/summary/basic-summary.md rename to docs/zh-CN/summary/basic-summary.md index e31cd7f692..1c0c152d15 100644 --- a/new-docs/zh-CN/summary/basic-summary.md +++ b/docs/zh-CN/summary/basic-summary.md @@ -24,4 +24,6 @@ specific language governing permissions and limitations under the License. --> -# Doris 基本概念 \ No newline at end of file +# Doris 基本概念 + +(TODO) \ No newline at end of file diff --git a/new-docs/zh-CN/summary/system-architecture.md b/docs/zh-CN/summary/system-architecture.md similarity index 96% rename from new-docs/zh-CN/summary/system-architecture.md rename to docs/zh-CN/summary/system-architecture.md index 14b7a58709..fc57b16dee 100644 --- a/new-docs/zh-CN/summary/system-architecture.md +++ b/docs/zh-CN/summary/system-architecture.md @@ -23,4 +23,7 @@ KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> -# Doris系统架构 \ No newline at end of file + +# Doris系统架构 + +(TODO) \ No newline at end of file diff --git a/new-docs/.markdownlint.yml b/new-docs/.markdownlint.yml deleted file mode 100644 index f01d39144f..0000000000 --- a/new-docs/.markdownlint.yml +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -{ - "default": true, - "MD013": false, -} diff --git a/new-docs/.vuepress/components/CaseList.vue b/new-docs/.vuepress/components/CaseList.vue deleted file mode 100644 index a899489e44..0000000000 --- a/new-docs/.vuepress/components/CaseList.vue +++ /dev/null @@ -1,105 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/new-docs/.vuepress/config.js b/new-docs/.vuepress/config.js deleted file mode 100644 index 1631a0134d..0000000000 --- a/new-docs/.vuepress/config.js +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Values would be replaced automatically during the travis' building -const BUILDING_BRANCH = process.env.BRANCH || '' -const ALGOLIA_API_KEY = process.env.ALGOLIA_API_KEY || '' -const ALGOLIA_INDEX_NAME = process.env.ALGOLIA_INDEX_NAME || '' - -function convertSidebar(list, path) { - if (list.length > 0) { - list.forEach((element, i) => { - if (element.children) { - convertSidebar(element.children, path + element.directoryPath) - delete element.directoryPath - } else { - list[i] = path + element - } - }); - } - return list -} - -function buildAlgoliaSearchConfig(lang) { - return { - apiKey: ALGOLIA_API_KEY, - indexName: ALGOLIA_INDEX_NAME, - algoliaOptions: { - facetFilters: ['lang:' + lang, 'version:' + BUILDING_BRANCH] - } - } -} - -module.exports = { - base: BUILDING_BRANCH.length > 0 ? '/' + BUILDING_BRANCH + '/' : '', - locales: { - '/en/': { - lang: 'en', - title: 'Apache Doris', - description: 'Apache Doris' - }, - '/zh-CN/': { - lang: 'zh-CN', - title: 'Apache Doris', - description: 'Apache Doris' - } - }, - head: [ - ['meta', { name: 'theme-color', content: '#3eaf7c' }], - ['meta', { name: 'apple-mobile-web-app-capable', content: 'yes' }], - ['meta', { name: 'apple-mobile-web-app-status-bar-style', content: 'black' }], - ['meta', { name: 'msapplication-TileColor', content: '#000000' }] - ], - title: 'Apache Doris', - description: 'Apache Doris', - themeConfig: { - title: 'Doris', - logo: '/images/doris-logo-only.png', - search: true, - smoothScroll: true, - searchMaxSuggestions: 10, - nextLinks: true, - prevLinks: true, - repo: 'apache/incubator-doris', - repoLabel: 'GitHub', - lastUpdated: 'Last Updated', - editLinks: true, - docsDir: 'docs', - docsBranch: BUILDING_BRANCH, - searchPlaceholder: 'Search', - locales: { - '/en/': { - algolia: buildAlgoliaSearchConfig('en'), - selectText: 'Languages', - label: 'English', - ariaLabel: 'Languages', - editLinkText: 'Edit this page on GitHub', - nav: [ - { - text: 'Download', link: '/en/downloads/downloads' - } - ], - sidebar: convertSidebar(require('./sidebar/en.js'), '/en/'), - article: { - metaTime: 'Date', - metaAuthor: 'Author', - paginationPre: 'Prev', - paginationNext: 'Next' - } - }, - '/zh-CN/': { - algolia: buildAlgoliaSearchConfig('zh-CN'), - selectText: 'Languages', - label: '简体中文', - editLinkText: '在 GitHub 上编辑此页', - nav: [ - { - text: '下载', link: '/zh-CN/downloads/downloads' - } - ], - sidebar: { - '/zh-CN/': convertSidebar(require('./sidebar/zh-CN.js'), '/zh-CN/') - }, - article: { - metaTime: '时间', - metaAuthor: '作者', - paginationPre: '上一页', - paginationNext: '下一页' - } - } - } - }, - plugins: [ - 'reading-progress', 'plugin-back-to-top', 'plugin-medium-zoom' - ] -}; diff --git a/new-docs/.vuepress/sidebar/en.js b/new-docs/.vuepress/sidebar/en.js deleted file mode 100644 index 44d79af711..0000000000 --- a/new-docs/.vuepress/sidebar/en.js +++ /dev/null @@ -1,947 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module.exports = [ - { - title: "Getting Started", - directoryPath: "get-starting/", - initialOpenGroupIndex: -1, - children: [ - "get-starting" - ], - }, - { - title: "Doris Architecture", - directoryPath: "summary/", - initialOpenGroupIndex: -1, - children: [ - "basic-summary", - "system-architecture" - ], - }, - { - title: "Install and deploy", - directoryPath: "install/", - initialOpenGroupIndex: -1, - children: [ - "install-deploy", - { - title: "Compile", - directoryPath: "source-install/", - initialOpenGroupIndex: -1, - children: [ - "compilation", - "compilation-with-ldb-toolchain", - "compilation-arm" - ], - sidebarDepth: 2, - }, - ] - }, - { - title: "Table Design", - directoryPath: "data-table/", - initialOpenGroupIndex: -1, - children: [ - "data-model", - "data-partition", - "basic-usage", - "advance-usage", - "hit-the-rollup", - "best-practice", - { - title: "Index", - directoryPath: "index/", - initialOpenGroupIndex: -1, - children: [ - "bloomfilter", - "prefix-index", - "bitmap-index" - ], - }, - ], - }, - { - title: "Data Operate", - directoryPath: "data-operate/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Import", - directoryPath: "import/", - initialOpenGroupIndex: -1, - children: [ - "load-manual", - { - title: "Import Scenes", - directoryPath: "import-scenes/", - initialOpenGroupIndex: -1, - children: [ - "local-file-load", - "external-storage-load", - "kafka-load", - "external-table-load", - "jdbc-load", - "load-atomicity", - "load-data-convert", - "load-strict-mode", - ], - }, - { - title: "Import Way", - directoryPath: "import-way/", - initialOpenGroupIndex: -1, - children: [ - "binlog-load-manual", - "broker-load-manual", - "routine-load-manual", - "spark-load-manual", - "stream-load-manual", - "s3-load-manual", - "insert-into-manual", - "load-json-format", - ], - }, - ], - }, - { - title: "Export", - directoryPath: "export/", - initialOpenGroupIndex: -1, - children: [ - "export-manual", - "outfile", - "export_with_mysql_dump", - ], - }, - { - title: "Update and Delete", - directoryPath: "update-delete/", - initialOpenGroupIndex: -1, - children: [ - "update", - "sequence-column-manual", - "delete-manual", - "batch-delete-manual" - ], - }, - ], - }, - { - title: "Advanced usage", - directoryPath: "advanced/", - initialOpenGroupIndex: -1, - children: [ - "materialized-view", - { - title: "Alter Table", - directoryPath: "alter-table/", - initialOpenGroupIndex: -1, - children: [ - "schema-change", - "replace-table" - ], - }, - { - title: "Doris Partition", - directoryPath: "partition/", - initialOpenGroupIndex: -1, - children: [ - "dynamic-partition", - "table-temp-partition" - ], - }, - { - title: "Join Optimization", - directoryPath: "join-optimization/", - initialOpenGroupIndex: -1, - children: [ - "bucket-shuffle-join", - "colocation-join", - "runtime-filter" - ], - }, - { - title: "Date Cache", - directoryPath: "cache/", - initialOpenGroupIndex: -1, - children: [ - "partition-cache" - ], - }, - "broker", - "resource", - "orthogonal-bitmap-manual", - "variables", - "time-zone", - "small-file-mgr", - { - title: "Best Practice", - directoryPath: "best-practice/", - initialOpenGroupIndex: -1, - children: [ - "query-analysis", - "import-analysis", - "debug-log" - ] - } - ], - }, - { - title: "Ecosystem", - directoryPath: "ecosystem/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Expansion table", - directoryPath: "external-table/", - initialOpenGroupIndex: -1, - children: [ - "doris-on-es", - "odbc-of-doris", - "hive-of-doris", - "iceberg-of-doris" - ], - }, - "audit-plugin", - "flink-doris-connector", - "spark-doris-connector", - "datax", - "logstash", - { - title: "SeaTunnel", - directoryPath: "seatunnel/", - initialOpenGroupIndex: -1, - children: [ - "flink-sink", - "spark-sink" - ], - }, - { - title: "UDF", - directoryPath: "udf/", - initialOpenGroupIndex: -1, - children: [ - "native-user-defined-function", - "remote-user-defined-function", - "contribute-udf" - ], - }, - ], - }, - { - title: "SQL manual", - directoryPath: "sql-manual/", - initialOpenGroupIndex: -1, - children: [ - { - title: "SQL Functions", - directoryPath: "sql-functions/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Date Functions", - directoryPath: "date-time-functions/", - initialOpenGroupIndex: -1, - children: [ - "convert_tz", - "curdate", - "current_timestamp", - "curtime", - "date_add", - "date_format", - "date_sub", - "datediff", - "day", - "dayname", - "dayofmonth", - "dayofweek", - "dayofyear", - "from_days", - "from_unixtime", - "hour", - "makedate", - "minute", - "month", - "monthname", - "now", - "second", - "str_to_date", - "time_round", - "timediff", - "timestampadd", - "timestampdiff", - "to_days", - "unix_timestamp", - "utc_timestamp", - "week", - "weekday", - "weekofyear", - "year", - "yearweek", - ], - }, - { - title: "GIS Functions", - directoryPath: "spatial-functions/", - initialOpenGroupIndex: -1, - children: [ - "st_astext", - "st_circle", - "st_contains", - "st_distance_sphere", - "st_geometryfromtext", - "st_linefromtext", - "st_point", - "st_polygon", - "st_x", - "st_y", - ], - }, - { - title: "String Functions", - directoryPath: "string-functions/", - initialOpenGroupIndex: -1, - children: [ - "append_trailing_char_if_absent", - "ascii", - "bit_length", - "char_length", - "concat", - "concat_ws", - "ends_with", - "find_in_set", - "hex", - "instr", - "lcase", - "left", - "length", - "locate", - "lower", - "lpad", - "ltrim", - "money_format", - "null_or_empty", - "repeat", - "replace", - "reverse", - "right", - "rpad", - "split_part", - "starts_with", - "strleft", - "strright", - "substring", - "unhex", - { - title: "Fuzzy Match", - directoryPath: "like/", - initialOpenGroupIndex: -1, - children: [ - "like", - "not_like", - ], - }, - { - title: "Regular Match", - directoryPath: "regexp/", - initialOpenGroupIndex: -1, - children: [ - "regexp", - "regexp_extract", - "regexp_replace", - "not_regexp", - ], - }, - ], - }, - { - title: "Aggregate Functions", - directoryPath: "aggregate-functions/", - initialOpenGroupIndex: -1, - children: [ - "approx_count_distinct", - "avg", - "bitmap_union", - "count", - "group_concat", - "hll_union_agg", - "max", - "min", - "percentile", - "percentile_approx", - "stddev", - "stddev_samp", - "sum", - "topn", - "var_samp", - "variance", - ], - }, - { - title: "Bitmap Functions", - directoryPath: "bitmap-functions/", - initialOpenGroupIndex: -1, - children: [ - "bitmap_and", - "bitmap_contains", - "bitmap_empty", - "bitmap_from_string", - "bitmap_has_any", - "bitmap_has_all", - "bitmap_hash", - "bitmap_intersect", - "bitmap_or", - "bitmap_and_count", - "bitmap_or_count", - "bitmap_xor", - "bitmap_xor_count", - "bitmap_not", - "bitmap_and_not", - "bitmap_and_not_count", - "bitmap_subset_in_range", - "bitmap_subset_limit", - "sub_bitmap", - "bitmap_to_string", - "bitmap_union", - "bitmap_xor", - "to_bitmap", - "bitmap_max", - "orthogonal_bitmap_intersect", - "orthogonal_bitmap_intersect_count", - "orthogonal_bitmap_union_count", - ], - }, - { - title: "Bitwise Functions", - directoryPath: "bitwise-functions/", - initialOpenGroupIndex: -1, - children: [ - "bitand", - "bitor", - "bitxor", - "bitnot" - ], - }, - { - title: "Condition Functions", - directoryPath: "conditional-functions/", - children: [ - "case", - "coalesce", - "if", - "ifnull", - "nullif" - ], - }, - { - title: "JSON Functions", - directoryPath: "json-functions/", - initialOpenGroupIndex: -1, - children: [ - "get_json_double", - "get_json_int", - "get_json_string", - "json_array", - "json_object", - "json_quote", - ], - }, - { - title: "Hash Functions", - directoryPath: "hash-functions/", - initialOpenGroupIndex: -1, - children: ["murmur_hash3_32"], - }, - { - title: "Math Functions", - directoryPath: "math-functions/", - initialOpenGroupIndex: -1, - children: [ - "conv", - "pmod" - ], - }, - { - title: "Encryption Functions", - directoryPath: "encrypt-dixgest-functions/", - initialOpenGroupIndex: -1, - children: [ - "aes", - "md5", - "md5sum", - "sm4", - "sm3", - "sm3sum" - ], - }, - { - title: "Table Functions", - directoryPath: "table-functions/", - initialOpenGroupIndex: -1, - children: [ - "explode-bitmap", - "explode-split", - "explode-json-array" - ], - }, - "window-function", - "cast", - "digital-masking", - ], - }, - { - title: "SQL Reference", - directoryPath: "sql-reference-v2/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Account Management", - directoryPath: "Account-Management-Statements/", - initialOpenGroupIndex: -1, - children: [ - "CREATE-USER", - "CREATE-ROLE", - "DROP-ROLE", - "DROP-USER", - "GRANT", - "REVOKE", - "SET-PASSWORD", - "SET-PROPERTY", - "LDAP", - ], - }, - { - title: "Cluster management", - directoryPath: "Cluster-Management-Statements/", - initialOpenGroupIndex: -1, - children: [ - "ALTER-SYSTEM-ADD-BACKEND", - "ALTER-SYSTEM-ADD-FOLLOWER", - "ALTER-SYSTEM-ADD-OBSERVER", - "ALTER-SYSTEM-DECOMMISSION-BACKEND", - "ALTER-SYSTEM-DROP-BACKEND", - "ALTER-SYSTEM-DROP-FOLLOWER", - "ALTER-SYSTEM-DROP-OBSERVER", - "ALTER-SYSTEM-MODIFY-BROKER", - "CANCEL-ALTER-SYSTEM", - ], - }, - { - title: "DDL", - directoryPath: "Data-Definition-Statements/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Alter", - directoryPath: "Alter/", - initialOpenGroupIndex: -1, - children: [ - "ALTER-DATABASE", - "ALTER-SQL-BLOCK-RULE", - "ALTER-TABLE-COLUMN", - "ALTER-TABLE-PARTITION", - "ALTER-TABLE-PROPERTY", - "ALTER-TABLE-RENAME", - "ALTER-TABLE-REPLACE", - "ALTER-TABLE-ROLLUP", - "ALTER-VIEW", - "CANCEL-ALTER-TABLE", - ], - }, - { - title: "Backup and Restore", - directoryPath: "Backup-and-Restore/", - initialOpenGroupIndex: -1, - children: [ - "BACKUP", - "CANCEL-BACKUP", - "CANCEL-RESTORE", - "CREATE-REPOSITORY", - "DROP-REPOSITORY", - "RESTORE", - ], - }, - { - title: "Create", - directoryPath: "Create/", - initialOpenGroupIndex: -1, - children: [ - "CREATE-DATABASE", - "CREATE-ENCRYPT-KEY", - "CREATE-FILE", - "CREATE-FUNCTION", - "CREATE-INDEX", - "CREATE-MATERIALIZED-VIEW", - "CREATE-RESOURCE", - "CREATE-SQL-BLOCK-RULE", - "CREATE-TABLE-LIKE", - "CREATE-TABLE", - "CREATE-VIEW", - "CREATE-EXTERNAL-TABLE", - ], - }, - { - title: "Drop", - directoryPath: "Drop/", - initialOpenGroupIndex: -1, - children: [ - "DROP-DATABASE", - "DROP-ENCRYPT-KEY", - "DROP-FILE", - "DROP-FUNCTION", - "DROP-INDEX", - "DROP-MATERIALIZED-VIEW", - "DROP-RESOURCE", - "DROP-SQL-BLOCK-RULE", - "DROP-TABLE", - "TRUNCATE-TABLE", - ], - }, - ], - }, - { - title: "DML", - directoryPath: "Data-Manipulation-Statements/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Load", - directoryPath: "Load/", - initialOpenGroupIndex: -1, - children: [ - "ALTER-ROUTINE-LOAD", - "BROKER-LOAD", - "CANCEL-LOAD", - "CREATE-ROUTINE-LOAD", - "PAUSE-ROUTINE-LOAD", - "RESUME-ROUTINE-LOAD", - "STOP-ROUTINE-LOAD", - "STREAM-LOAD", - "PAUSE-SYNC-JOB", - "RESUME-SYNC-JOB", - "STOP-SYNC-JOB", - "CREATE-SYNC-JOB", - ], - }, - { - title: "Manipulation", - directoryPath: "Manipulation/", - initialOpenGroupIndex: -1, - children: [ - "DELETE", - "INSERT", - "UPDATE", - ], - }, - "OUTFILE" - ], - }, - { - title: "Database Administration", - directoryPath: "Database-Administration-Statements/", - initialOpenGroupIndex: -1, - children: [ - "ADMIN-CANCEL-REPAIR", - "ADMIN-CHECK-TABLET", - "ADMIN-REPAIR-TABLE", - "ADMIN-SET-CONFIG", - "ADMIN-SET-REPLICA-STATUS", - "ADMIN-SHOW-CONFIG", - "ADMIN-SHOW-REPLICA-DISTRIBUTION", - "ADMIN-SHOW-REPLICA-STATUS", - "ENABLE-FEATURE", - "INSTALL-PLUGIN", - "KILL", - "RECOVER", - "SET-VARIABLE", - "UNINSTALL-PLUGIN", - ], - }, - { - title: "Show", - directoryPath: "Show-Statements/", - initialOpenGroupIndex: -1, - children: [ - "SHOW-ALTER", - "SHOW-BACKENDS", - "SHOW-BACKUP", - "SHOW-BROKER", - "SHOW-CHARSET", - "SHOW-COLLATION", - "SHOW-COLUMNS", - "SHOW-CREATE-DATABASE", - "SHOW-CREATE-FUNCTION", - "SHOW-CREATE-ROUTINE-LOAD", - "SHOW-CREATE-TABLE", - "SHOW-DATA", - "SHOW-DATABASE-ID", - "SHOW-DATABASES", - "SHOW-DELETE", - "SHOW-DYNAMIC-PARTITION", - "SHOW-ENCRYPT-KEY", - "SHOW-ENGINES", - "SHOW-EVENTS", - "SHOW-EXPORT", - "SHOW-FRONTENDS", - "SHOW-FUNCTIONS", - "SHOW-GRANTS", - "SHOW-INDEX", - "SHOW-LOAD-PROFILE", - "SHOW-LOAD-WARNINGS", - "SHOW-LOAD", - "SHOW-MIGRATIONS", - "SHOW-OPEN-TABLES", - "SHOW-PARTITION-ID", - "SHOW-PARTITIONS", - "SHOW-PLUGINS", - "SHOW-PROC", - "SHOW-PROCEDURE", - "SHOW-PROCESSLIST", - "SHOW-PROPERTY", - "SHOW-QUERY-PROFILE", - "SHOW-REPOSITORIES", - "SHOW-RESOURCES", - "SHOW-RESTORE", - "SHOW-ROLES", - "SHOW-ROLLUP", - "SHOW-ROUTINE-LOAD-TASK", - "SHOW-ROUTINE-LOAD", - "SHOW-SMALL-FILES", - "SHOW-SNAPSHOT", - "SHOW-SQL-BLOCK-RULE", - "SHOW-STATUS", - "SHOW-STREAM-LOAD", - "SHOW-SYNC-JOB", - "SHOW-TABLE-ID", - "SHOW-TABLE-STATUS", - "SHOW-TABLET", - "SHOW-TRANSACTION", - "SHOW-TRIGGERS", - "SHOW-TRASH", - "SHOW-USER", - "SHOW-VARIABLES", - "SHOW-VIEW", - "SHOW-WARNING", - "SHOW-WHITE-LIST", - ], - }, - { - title: "Data Types", - directoryPath: "Data-Types/", - initialOpenGroupIndex: -1, - children: [ - "BIGINT", - "BITMAP", - "BOOLEAN", - "CHAR", - "DATE", - "DATETIME", - "DECIMAL", - "DOUBLE", - "FLOAT", - "HLL", - "INT", - "SMALLINT", - "STRING", - "TINYINT", - "VARCHAR", - ], - }, - { - title: "Utility", - directoryPath: "Utility-Statements/", - initialOpenGroupIndex: -1, - children: [ - "DESCRIBE", - "HELP", - "USE", - ], - }, - ], - }, - ], - }, - { - title: "Admin Manual", - directoryPath: "admin-manual/", - initialOpenGroupIndex: -1, - children: [ - { - title: "cluster management", - directoryPath: "cluster-management/", - initialOpenGroupIndex: -1, - children: [ - "upgrade", - "elastic-expansion", - "load-balancing" - ], - }, - { - title: "Data Admin", - directoryPath: "data-admin/", - initialOpenGroupIndex: -1, - children: [ - "backup", - "restore", - "delete-recover" - ], - }, - "sql-interception", - "query-profile", - "optimization", - { - title: "Maintenance and Monitor", - directoryPath: "maint-monitor/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Monitor Metrics", - directoryPath: "monitor-metrics/", - initialOpenGroupIndex: -1, - children: [ - "fe-metrics", - "be-metrics" - ], - }, - "disk-capacity", - "metadata-operation", - "tablet-meta-tool", - "tablet-repair-and-balance", - "tablet-restore-tool", - "monitor-alert", - "doris-error-code", - "be-olap-error-code" - ], - }, - { - title: "Config", - directoryPath: "config/", - initialOpenGroupIndex: -1, - children: [ - "fe-config", - "be-config", - "user-property" - ], - }, - { - title: "User Privilege and Ldap", - directoryPath: "privilege-ldap/", - initialOpenGroupIndex: -1, - children: [ - "user-privilege", - "ldap" - ], - }, - "multi-tenant", - { - title: "HTTP API", - directoryPath: "http-actions/", - initialOpenGroupIndex: -1, - children: [ - { - title: "FE", - directoryPath: "fe/", - initialOpenGroupIndex: -1, - children: [ - { - title: "MANAGER", - directoryPath: "manager/", - initialOpenGroupIndex: -1, - children: [ - "cluster-action", - "node-action", - "query-profile-action", - ], - }, - "backends-action", - "bootstrap-action", - "cancel-load-action", - "check-decommission-action", - "check-storage-type-action", - "config-action", - "connection-action", - "get-ddl-stmt-action", - "get-load-info-action", - "get-load-state", - "get-log-file-action", - "get-small-file", - "ha-action", - "hardware-info-action", - "health-action", - "log-action", - "logout-action", - "meta-action", - "meta-info-action", - "meta-replay-state-action", - "profile-action", - "query-detail-action", - "query-profile-action", - "row-count-action", - "session-action", - "set-config-action", - "show-data-action", - "show-meta-info-action", - "show-proc-action", - "show-runtime-info-action", - "statement-execution-action", - "system-action", - "table-query-plan-action", - "table-row-count-action", - "table-schema-action", - "upload-action", - ], - }, - "cancel-label", - "check-reset-rpc-cache", - "compaction-action", - "connection-action", - "fe-get-log-file", - "get-load-state", - "get-tablets", - "profile-action", - "query-detail-action", - "restore-tablet", - "show-data-action", - "tablet-migration-action", - "tablets_distribution", - ], - sidebarDepth: 1, - }, - ], - }, - { - title: "FQA", - directoryPath: "faq/", - initialOpenGroupIndex: -1, - children: [ - "install-faq", - "data-faq", - "sql-faq" - ], - }, - { - title: "Benchmark", - directoryPath: "benchmark/", - initialOpenGroupIndex: -1, - children: [ - "ssb", - "tpc-h" - ], - } -]; diff --git a/new-docs/.vuepress/sidebar/zh-CN.js b/new-docs/.vuepress/sidebar/zh-CN.js deleted file mode 100644 index 54f6264240..0000000000 --- a/new-docs/.vuepress/sidebar/zh-CN.js +++ /dev/null @@ -1,947 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module.exports = [ - { - title: "快速开始", - directoryPath: "get-starting/", - initialOpenGroupIndex: -1, - children: [ - "get-starting" - ], - }, - { - title: "Doris架构", - directoryPath: "summary/", - initialOpenGroupIndex: -1, - children: [ - "basic-summary", - "system-architecture" - ], - }, - { - title: "安装部署", - directoryPath: "install/", - initialOpenGroupIndex: -1, - children: [ - "install-deploy", - { - title: "源码编译", - directoryPath: "source-install/", - initialOpenGroupIndex: -1, - children: [ - "compilation", - "compilation-with-ldb-toolchain", - "compilation-arm" - ], - sidebarDepth: 2, - }, - ] - }, - { - title: "数据表设计", - directoryPath: "data-table/", - initialOpenGroupIndex: -1, - children: [ - "data-model", - "data-partition", - "basic-usage", - "advance-usage", - "hit-the-rollup", - "best-practice", - { - title: "索引", - directoryPath: "index/", - initialOpenGroupIndex: -1, - children: [ - "bloomfilter", - "prefix-index", - "bitmap-index" - ], - }, - ], - }, - { - title: "数据操作", - directoryPath: "data-operate/", - initialOpenGroupIndex: -1, - children: [ - { - title: "数据导入", - directoryPath: "import/", - initialOpenGroupIndex: -1, - children: [ - "load-manual", - { - title: "按场景导入", - directoryPath: "import-scenes/", - initialOpenGroupIndex: -1, - children: [ - "local-file-load", - "external-storage-load", - "kafka-load", - "external-table-load", - "jdbc-load", - "load-atomicity", - "load-data-convert", - "load-strict-mode", - ], - }, - { - title: "按方式导入", - directoryPath: "import-way/", - initialOpenGroupIndex: -1, - children: [ - "binlog-load-manual", - "broker-load-manual", - "routine-load-manual", - "spark-load-manual", - "stream-load-manual", - "s3-load-manual", - "insert-into-manual", - "load-json-format", - ], - }, - ], - }, - { - title: "Export导出", - directoryPath: "export/", - initialOpenGroupIndex: -1, - children: [ - "export-manual", - "outfile", - "export_with_mysql_dump", - ], - }, - { - title: "数据更新及删除", - directoryPath: "update-delete/", - initialOpenGroupIndex: -1, - children: [ - "update", - "sequence-column-manual", - "delete-manual", - "batch-delete-manual" - ], - }, - ], - }, - { - title: "进阶使用", - directoryPath: "advanced/", - initialOpenGroupIndex: -1, - children: [ - "materialized-view", - { - title: "表结构变更", - directoryPath: "alter-table/", - initialOpenGroupIndex: -1, - children: [ - "schema-change", - "replace-table" - ], - }, - { - title: "Doris表分区", - directoryPath: "partition/", - initialOpenGroupIndex: -1, - children: [ - "dynamic-partition", - "table-tmp-partition" - ], - }, - { - title: "Join优化", - directoryPath: "join-optimization/", - initialOpenGroupIndex: -1, - children: [ - "bucket-shuffle-join", - "colocation-join", - "runtime-filter" - ], - }, - { - title: "数据缓存", - directoryPath: "cache/", - initialOpenGroupIndex: -1, - children: [ - "partition-cache" - ], - }, - "broker", - "resource", - "orthogonal-bitmap-manual", - "variables", - "time-zone", - "small-file-mgr", - { - title: "最佳实践", - directoryPath: "best-practice/", - initialOpenGroupIndex: -1, - children: [ - "query-analysis", - "import-analysis", - "debug-log" - ], - } - ], - }, - { - title: "生态扩展", - directoryPath: "ecosystem/", - initialOpenGroupIndex: -1, - children: [ - { - title: "扩展表", - directoryPath: "external-table/", - initialOpenGroupIndex: -1, - children: [ - "doris-on-es", - "odbc-of-doris", - "hive-of-doris", - "iceberg-of-doris" - ], - }, - "audit-plugin", - "flink-doris-connector", - "spark-doris-connector", - "datax", - "logstash", - { - title: "SeaTunnel", - directoryPath: "seatunnel/", - initialOpenGroupIndex: -1, - children: [ - "flink-sink", - "spark-sink" - ], - }, - { - title: "自定义函数", - directoryPath: "udf/", - initialOpenGroupIndex: -1, - children: [ - "native-user-defined-function", - "remote-user-defined-function", - "contribute-udf" - ], - }, - ], - }, - { - title: "SQL手册", - directoryPath: "sql-manual/", - initialOpenGroupIndex: -1, - children: [ - { - title: "SQL 函数", - directoryPath: "sql-functions/", - initialOpenGroupIndex: -1, - children: [ - { - title: "日期函数", - directoryPath: "date-time-functions/", - initialOpenGroupIndex: -1, - children: [ - "convert_tz", - "curdate", - "current_timestamp", - "curtime", - "date_add", - "date_format", - "date_sub", - "datediff", - "day", - "dayname", - "dayofmonth", - "dayofweek", - "dayofyear", - "from_days", - "from_unixtime", - "hour", - "makedate", - "minute", - "month", - "monthname", - "now", - "second", - "str_to_date", - "time_round", - "timediff", - "timestampadd", - "timestampdiff", - "to_days", - "unix_timestamp", - "utc_timestamp", - "week", - "weekday", - "weekofyear", - "year", - "yearweek", - ], - }, - { - title: "地理位置函数", - directoryPath: "spatial-functions/", - initialOpenGroupIndex: -1, - children: [ - "st_astext", - "st_circle", - "st_contains", - "st_distance_sphere", - "st_geometryfromtext", - "st_linefromtext", - "st_point", - "st_polygon", - "st_x", - "st_y", - ], - }, - { - title: "字符串函数", - directoryPath: "string-functions/", - initialOpenGroupIndex: -1, - children: [ - "append_trailing_char_if_absent", - "ascii", - "bit_length", - "char_length", - "concat", - "concat_ws", - "ends_with", - "find_in_set", - "hex", - "instr", - "lcase", - "left", - "length", - "locate", - "lower", - "lpad", - "ltrim", - "money_format", - "null_or_empty", - "repeat", - "replace", - "reverse", - "right", - "rpad", - "split_part", - "starts_with", - "strleft", - "strright", - "substring", - "unhex", - { - title: "模糊匹配", - directoryPath: "like/", - initialOpenGroupIndex: -1, - children: [ - "like", - "not_like", - ], - }, - { - title: "正则匹配", - directoryPath: "regexp/", - initialOpenGroupIndex: -1, - children: [ - "regexp", - "regexp_extract", - "regexp_replace", - "not_regexp", - ], - }, - ], - }, - { - title: "聚合函数", - directoryPath: "aggregate-functions/", - initialOpenGroupIndex: -1, - children: [ - "approx_count_distinct", - "avg", - "bitmap_union", - "count", - "group_concat", - "hll_union_agg", - "max", - "min", - "percentile", - "percentile_approx", - "stddev", - "stddev_samp", - "sum", - "topn", - "var_samp", - "variance", - ], - }, - { - title: "bitmap函数", - directoryPath: "bitmap-functions/", - initialOpenGroupIndex: -1, - children: [ - "bitmap_and", - "bitmap_contains", - "bitmap_empty", - "bitmap_from_string", - "bitmap_has_any", - "bitmap_has_all", - "bitmap_hash", - "bitmap_intersect", - "bitmap_or", - "bitmap_and_count", - "bitmap_or_count", - "bitmap_xor", - "bitmap_xor_count", - "bitmap_not", - "bitmap_and_not", - "bitmap_and_not_count", - "bitmap_subset_in_range", - "bitmap_subset_limit", - "sub_bitmap", - "bitmap_to_string", - "bitmap_union", - "bitmap_xor", - "to_bitmap", - "bitmap_max", - "orthogonal_bitmap_intersect", - "orthogonal_bitmap_intersect_count", - "orthogonal_bitmap_union_count", - ], - }, - { - title: "bitwise函数", - directoryPath: "bitwise-functions/", - initialOpenGroupIndex: -1, - children: [ - "bitand", - "bitor", - "bitxor", - "bitnot" - ], - }, - { - title: "条件函数", - directoryPath: "conditional-functions/", - children: [ - "case", - "coalesce", - "if", - "ifnull", - "nullif" - ], - }, - { - title: "json 函数", - directoryPath: "json-functions/", - initialOpenGroupIndex: -1, - children: [ - "get_json_double", - "get_json_int", - "get_json_string", - "json_array", - "json_object", - "json_quote", - ], - }, - { - title: "Hash函数", - directoryPath: "hash-functions/", - initialOpenGroupIndex: -1, - children: ["murmur_hash3_32"], - }, - { - title: "数学函数", - directoryPath: "math-functions/", - initialOpenGroupIndex: -1, - children: [ - "conv", - "pmod" - ], - }, - { - title: "加密和信息摘要函数", - directoryPath: "encrypt-digest-functions/", - initialOpenGroupIndex: -1, - children: [ - "aes", - "md5", - "md5sum", - "sm4", - "sm3", - "sm3sum" - ], - }, - { - title: "table functions", - directoryPath: "table-functions/", - initialOpenGroupIndex: -1, - children: [ - "explode-bitmap", - "explode-split", - "explode-json-array" - ], - }, - "window-function", - "cast", - "digital-masking", - ], - }, - { - title: "SQL手册", - directoryPath: "sql-reference-v2/", - initialOpenGroupIndex: -1, - children: [ - { - title: "账户管理", - directoryPath: "Account-Management-Statements/", - initialOpenGroupIndex: -1, - children: [ - "CREATE-USER", - "CREATE-ROLE", - "DROP-ROLE", - "DROP-USER", - "GRANT", - "REVOKE", - "SET-PASSWORD", - "SET-PROPERTY", - "LDAP", - ], - }, - { - title: "集群管理", - directoryPath: "Cluster-Management-Statements/", - initialOpenGroupIndex: -1, - children: [ - "ALTER-SYSTEM-ADD-BACKEND", - "ALTER-SYSTEM-ADD-FOLLOWER", - "ALTER-SYSTEM-ADD-OBSERVER", - "ALTER-SYSTEM-DECOMMISSION-BACKEND", - "ALTER-SYSTEM-DROP-BACKEND", - "ALTER-SYSTEM-DROP-FOLLOWER", - "ALTER-SYSTEM-DROP-OBSERVER", - "ALTER-SYSTEM-MODIFY-BROKER", - "CANCEL-ALTER-SYSTEM", - ], - }, - { - title: "DDL", - directoryPath: "Data-Definition-Statements/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Alter", - directoryPath: "Alter/", - initialOpenGroupIndex: -1, - children: [ - "ALTER-DATABASE", - "ALTER-SQL-BLOCK-RULE", - "ALTER-TABLE-COLUMN", - "ALTER-TABLE-PARTITION", - "ALTER-TABLE-PROPERTY", - "ALTER-TABLE-RENAME", - "ALTER-TABLE-REPLACE", - "ALTER-TABLE-ROLLUP", - "ALTER-VIEW", - "CANCEL-ALTER-TABLE", - ], - }, - { - title: "备份及恢复", - directoryPath: "Backup-and-Restore/", - initialOpenGroupIndex: -1, - children: [ - "BACKUP", - "CANCEL-BACKUP", - "CANCEL-RESTORE", - "CREATE-REPOSITORY", - "DROP-REPOSITORY", - "RESTORE", - ], - }, - { - title: "Create", - directoryPath: "Create/", - initialOpenGroupIndex: -1, - children: [ - "CREATE-DATABASE", - "CREATE-ENCRYPT-KEY", - "CREATE-FILE", - "CREATE-FUNCTION", - "CREATE-INDEX", - "CREATE-MATERIALIZED-VIEW", - "CREATE-RESOURCE", - "CREATE-SQL-BLOCK-RULE", - "CREATE-TABLE-LIKE", - "CREATE-TABLE", - "CREATE-VIEW", - "CREATE-EXTERNAL-TABLE", - ], - }, - { - title: "Drop", - directoryPath: "Drop/", - initialOpenGroupIndex: -1, - children: [ - "DROP-DATABASE", - "DROP-ENCRYPT-KEY", - "DROP-FILE", - "DROP-FUNCTION", - "DROP-INDEX", - "DROP-MATERIALIZED-VIEW", - "DROP-RESOURCE", - "DROP-SQL-BLOCK-RULE", - "DROP-TABLE", - "TRUNCATE-TABLE", - ], - }, - ], - }, - { - title: "DML", - directoryPath: "Data-Manipulation-Statements/", - initialOpenGroupIndex: -1, - children: [ - { - title: "Load", - directoryPath: "Load/", - initialOpenGroupIndex: -1, - children: [ - "ALTER-ROUTINE-LOAD", - "BROKER-LOAD", - "CANCEL-LOAD", - "CREATE-ROUTINE-LOAD", - "PAUSE-ROUTINE-LOAD", - "RESUME-ROUTINE-LOAD", - "STOP-ROUTINE-LOAD", - "STREAM-LOAD", - "PAUSE-SYNC-JOB", - "RESUME-SYNC-JOB", - "STOP-SYNC-JOB", - "CREATE-SYNC-JOB", - ], - }, - { - title: "操作", - directoryPath: "Manipulation/", - initialOpenGroupIndex: -1, - children: [ - "DELETE", - "INSERT", - "UPDATE", - ], - }, - "OUTFILE" - ], - }, - { - title: "数据库管理", - directoryPath: "Database-Administration-Statements/", - initialOpenGroupIndex: -1, - children: [ - "ADMIN-CANCEL-REPAIR", - "ADMIN-CHECK-TABLET", - "ADMIN-REPAIR-TABLE", - "ADMIN-SET-CONFIG", - "ADMIN-SET-REPLICA-STATUS", - "ADMIN-SHOW-CONFIG", - "ADMIN-SHOW-REPLICA-DISTRIBUTION", - "ADMIN-SHOW-REPLICA-STATUS", - "ENABLE-FEATURE", - "INSTALL-PLUGIN", - "KILL", - "RECOVER", - "SET-VARIABLE", - "UNINSTALL-PLUGIN", - ], - }, - { - title: "Show", - directoryPath: "Show-Statements/", - initialOpenGroupIndex: -1, - children: [ - "SHOW-ALTER", - "SHOW-BACKENDS", - "SHOW-BACKUP", - "SHOW-BROKER", - "SHOW-CHARSET", - "SHOW-COLLATION", - "SHOW-COLUMNS", - "SHOW-CREATE-DATABASE", - "SHOW-CREATE-FUNCTION", - "SHOW-CREATE-ROUTINE-LOAD", - "SHOW-CREATE-TABLE", - "SHOW-DATA", - "SHOW-DATABASE-ID", - "SHOW-DATABASES", - "SHOW-DELETE", - "SHOW-DYNAMIC-PARTITION", - "SHOW-ENCRYPT-KEY", - "SHOW-ENGINES", - "SHOW-EVENTS", - "SHOW-EXPORT", - "SHOW-FRONTENDS", - "SHOW-FUNCTIONS", - "SHOW-GRANTS", - "SHOW-INDEX", - "SHOW-LOAD-PROFILE", - "SHOW-LOAD-WARNINGS", - "SHOW-LOAD", - "SHOW-MIGRATIONS", - "SHOW-OPEN-TABLES", - "SHOW-PARTITION-ID", - "SHOW-PARTITIONS", - "SHOW-PLUGINS", - "SHOW-PROC", - "SHOW-PROCEDURE", - "SHOW-PROCESSLIST", - "SHOW-PROPERTY", - "SHOW-QUERY-PROFILE", - "SHOW-REPOSITORIES", - "SHOW-RESOURCES", - "SHOW-RESTORE", - "SHOW-ROLES", - "SHOW-ROLLUP", - "SHOW-ROUTINE-LOAD-TASK", - "SHOW-ROUTINE-LOAD", - "SHOW-SMALL-FILES", - "SHOW-SNAPSHOT", - "SHOW-SQL-BLOCK-RULE", - "SHOW-STATUS", - "SHOW-STREAM-LOAD", - "SHOW-SYNC-JOB", - "SHOW-TABLE-ID", - "SHOW-TABLE-STATUS", - "SHOW-TABLET", - "SHOW-TRANSACTION", - "SHOW-TRIGGERS", - "SHOW-TRASH", - "SHOW-USER", - "SHOW-VARIABLES", - "SHOW-VIEW", - "SHOW-WARNING", - "SHOW-WHITE-LIST", - ], - }, - { - title: "数据类型", - directoryPath: "Data-Types/", - initialOpenGroupIndex: -1, - children: [ - "BIGINT", - "BITMAP", - "BOOLEAN", - "CHAR", - "DATE", - "DATETIME", - "DECIMAL", - "DOUBLE", - "FLOAT", - "HLL", - "INT", - "SMALLINT", - "STRING", - "TINYINT", - "VARCHAR", - ], - }, - { - title: "辅助命令", - directoryPath: "Utility-Statements/", - initialOpenGroupIndex: -1, - children: [ - "DESCRIBE", - "HELP", - "USE" - ], - }, - ], - }, - ], - }, - { - title: "管理手册", - directoryPath: "admin-manual/", - initialOpenGroupIndex: -1, - children: [ - { - title: "集群管理", - directoryPath: "cluster-management/", - initialOpenGroupIndex: -1, - children: [ - "upgrade", - "elastic-expansion", - "load-balancing" - ], - }, - { - title: "数据管理", - directoryPath: "data-admin/", - initialOpenGroupIndex: -1, - children: [ - "backup", - "restore", - "delete-recover" - ], - }, - "sql-interception", - "query-profile", - "optimization", - { - title: "运维监控", - directoryPath: "maint-monitor/", - initialOpenGroupIndex: -1, - children: [ - { - title: "监控指标", - directoryPath: "monitor-metrics/", - initialOpenGroupIndex: -1, - children: [ - "fe-metrics", - "be-metrics" - ], - }, - "disk-capacity", - "metadata-operation", - "tablet-meta-tool", - "tablet-repair-and-balance", - "tablet-restore-tool", - "monitor-alert", - "doris-error-code", - "be-olap-error-code" - ], - }, - { - title: "配置管理", - directoryPath: "config/", - initialOpenGroupIndex: -1, - children: [ - "fe-config", - "be-config", - "user-property" - ], - }, - { - title: "用户权限及认证", - directoryPath: "privilege-ldap/", - initialOpenGroupIndex: -1, - children: [ - "user-privilege", - "ldap" - ], - }, - "multi-tenant", - { - title: "HTTP API", - directoryPath: "http-actions/", - initialOpenGroupIndex: -1, - children: [ - { - title: "FE", - directoryPath: "fe/", - initialOpenGroupIndex: -1, - children: [ - { - title: "MANAGER", - directoryPath: "manager/", - initialOpenGroupIndex: -1, - children: [ - "cluster-action", - "node-action", - "query-profile-action", - ], - }, - "backends-action", - "bootstrap-action", - "cancel-load-action", - "check-decommission-action", - "check-storage-type-action", - "config-action", - "connection-action", - "get-ddl-stmt-action", - "get-load-info-action", - "get-load-state", - "get-log-file-action", - "get-small-file", - "ha-action", - "hardware-info-action", - "health-action", - "log-action", - "logout-action", - "meta-action", - "meta-info-action", - "meta-replay-state-action", - "profile-action", - "query-detail-action", - "query-profile-action", - "row-count-action", - "session-action", - "set-config-action", - "show-data-action", - "show-meta-info-action", - "show-proc-action", - "show-runtime-info-action", - "statement-execution-action", - "system-action", - "table-query-plan-action", - "table-row-count-action", - "table-schema-action", - "upload-action", - ], - }, - "cancel-label", - "check-reset-rpc-cache", - "compaction-action", - "connection-action", - "fe-get-log-file", - "get-load-state", - "get-tablets", - "profile-action", - "query-detail-action", - "restore-tablet", - "show-data-action", - "tablet-migration-action", - "tablets_distribution", - ], - sidebarDepth: 1, - }, - ], - }, - { - title: "常见问题", - directoryPath: "faq/", - initialOpenGroupIndex: -1, - children: [ - "install-faq", - "data-faq", - "sql-faq" - ], - }, - { - title: "性能测试", - directoryPath: "benchmark/", - initialOpenGroupIndex: -1, - children: [ - "ssb", - "tpc-h" - ], - } -]; diff --git a/new-docs/.vuepress/theme/components/Footer.vue b/new-docs/.vuepress/theme/components/Footer.vue deleted file mode 100644 index 1f58cc60a9..0000000000 --- a/new-docs/.vuepress/theme/components/Footer.vue +++ /dev/null @@ -1,89 +0,0 @@ - - - - - diff --git a/new-docs/.vuepress/theme/components/Home.vue b/new-docs/.vuepress/theme/components/Home.vue deleted file mode 100644 index 54945dc81c..0000000000 --- a/new-docs/.vuepress/theme/components/Home.vue +++ /dev/null @@ -1,454 +0,0 @@ - - - - - - - \ No newline at end of file diff --git a/new-docs/.vuepress/theme/index.js b/new-docs/.vuepress/theme/index.js deleted file mode 100644 index d882e00623..0000000000 --- a/new-docs/.vuepress/theme/index.js +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module.exports = { - extend: "@vuepress/theme-default" -}; diff --git a/new-docs/.vuepress/theme/layouts/Article.vue b/new-docs/.vuepress/theme/layouts/Article.vue deleted file mode 100644 index e23c358893..0000000000 --- a/new-docs/.vuepress/theme/layouts/Article.vue +++ /dev/null @@ -1,71 +0,0 @@ - - - - - \ No newline at end of file diff --git a/new-docs/.vuepress/theme/layouts/ArticleList.vue b/new-docs/.vuepress/theme/layouts/ArticleList.vue deleted file mode 100644 index aba94a7875..0000000000 --- a/new-docs/.vuepress/theme/layouts/ArticleList.vue +++ /dev/null @@ -1,141 +0,0 @@ - - - - - \ No newline at end of file diff --git a/new-docs/.vuepress/theme/layouts/Layout.vue b/new-docs/.vuepress/theme/layouts/Layout.vue deleted file mode 100644 index 98ad313485..0000000000 --- a/new-docs/.vuepress/theme/layouts/Layout.vue +++ /dev/null @@ -1,57 +0,0 @@ - - - - \ No newline at end of file diff --git a/new-docs/.vuepress/theme/styles/index.styl b/new-docs/.vuepress/theme/styles/index.styl deleted file mode 100644 index 52c89d31e0..0000000000 --- a/new-docs/.vuepress/theme/styles/index.styl +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -/* Override style of sidebar's sub-direcotry */ -.sidebar-group.is-sub-group > .sidebar-heading:not(.clickable) { - opacity: 1 !important; -} diff --git a/new-docs/.vuepress/vuepress.textClipping b/new-docs/.vuepress/vuepress.textClipping deleted file mode 100644 index d943526db7feb3c0bc00054f44a1da655138f3e8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 199 zcmYc)$jK}&F)+Bu$P^J8;;HMBSdw^|nT3^&og-dAB{@G=FR`E?CsnVcC^03oBr`uR zF-JEyF - -# Doris Document - -[Vuepress](https://github.com/vuejs/vuepress.git) is used as our document site generator. Configurations are in `./docs/.vuepress` folder. - -## Getting Started - -Download and install [nodejs](http://nodejs.cn/download/) - -```bash -npm config set registry https://registry.npm.taobao.org // Only if you are in Mainland China. -cd docs && npm install -npm run dev -``` - -Open your browser and navigate to `localhost:8080/en/` or `localhost:8080/zh-CN/`. - -## Docs' Directories - -```bash - . - ├─ docs/ - │ ├─ .vuepress - │ │ ├─ dist // Built site files. - │ │ ├─ public // Assets - │ │ ├─ sidebar // Side bar configurations. - │ │ │ ├─ en.js - │ │ │ └─ zh-CN.js - │ ├─ theme // Global styles and customizations. - │ └─ config.js // Vuepress configurations. - ├─ zh-CN/ - │ ├─ xxxx.md - │ └─ README.md // Will be rendered as entry page. - └─ en/ - ├─ one.md - └─ README.md // Will be rendered as entry page. -``` - -## Start Writing - -1. Write markdown files in multi languages and put them in separated folders `./en/` and `./zh-CN/`. **But they should be with the same name.** - - ```bash - . - ├─ en/ - │ ├─ one.md - │ └─ two.md - └─ zh-CN/ - │ ├─ one.md - │ └─ two.md - ``` - -2. Frontmatters like below should always be on the top of each file: - - ```markdown - --- - { - "title": "Backup and Recovery", // sidebar title - "language": "en" // writing language - } - --- - ``` - -3. Assets are in `.vuepress/public/`. - - Assuming that there exists a png `.vuepress/public/images/image_x.png`, then it can be used like: - - ```markdown - ![alter text](/images/image_x.png) - ``` - -4. Remember to update the sidebar configurations in `.vuepress/sidebar/` after adding a new file or a folder. - - Assuming that the directories are: - - ```bash - . - ├─ en/ - │ ├─ subfolder - │ │ ├─ one.md - │ │ └─ two.md - │ └─ three.md - └─ zh-CN/ - ├─ subfolder - │ ├─ one.md - │ └─ two.md - └─ three.md - ``` - - Then the sidebar configurations would be like: - - ```javascript - // .vuepress/sidebar/en.js` - module.exports = [ - { - title: "subfolder name", - directoryPath: "subfolder/", - children: ["one", "two"] - }, - "three" - ] - ``` - - ```javascript - // .vuepress/sidebar/zh-CN.js - module.exports = [ - { - title: "文件夹名称", - directoryPath: "subfolder/", - children: ["one", "two"] - }, - "three" - ] - ``` - -5. Run `npm run lint` before starting a PR. - - Surely that there will be lots of error logs if the mardown files are not following the rules, and these logs will all be printed in the console: - -```shell - -en/administrator-guide/alter-table/alter-table-bitmap-index.md:92 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: " ```"] -en/administrator-guide/alter-table/alter-table-rollup.md:45 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"] -en/administrator-guide/alter-table/alter-table-rollup.md:77 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"] -en/administrator-guide/alter-table/alter-table-rollup.md:178 MD046/code-block-style Code block style [Expected: fenced; Actual: indented] -en/administrator-guide/alter-table/alter-table-schema-change.md:50 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"] -en/administrator-guide/alter-table/alter-table-schema-change.md:82 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"] -en/administrator-guide/alter-table/alter-table-schema-change.md:127 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"] -en/administrator-guide/alter-table/alter-table-schema-change.md:144 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"] -en/administrator-guide/alter-table/alter-table-schema-change.md:153 MD040/fenced-code-language Fenced code blocks should have a language specified [Context: "```"] -en/administrator-guide/alter-table/alter-table-schema-change.md:199 MD046/code-block-style Code block style [Expected: fenced; Actual: indented] -en/administrator-guide/backup-restore.md:45:1 MD029/ol-prefix Ordered list item prefix [Expected: 1; Actual: 2; Style: 1/1/1] -en/administrator-guide/backup-restore.md:57:1 MD029/ol-prefix Ordered list item prefix [Expected: 1; Actual: 2; Style: 1/1/1] -en/administrator-guide/backup-restore.md:61:1 MD029/ol-prefix Ordered list item prefix [Expected: 1; Actual: 3; Style: 1/1/1] -npm ERR! code ELIFECYCLE -npm ERR! errno 1 -npm ERR! docs@ lint: `markdownlint '**/*.md' -f` -npm ERR! Exit status 1 -npm ERR! -npm ERR! Failed at the docs@ lint script. - -``` - -## FullText search - -We use [Algolia DocSearch](https://docsearch.algolia.com/) as our fulltext search engine. - -One thing we need to do is that [Config.json From DocSearch](https://github.com/algolia/docsearch-configs/blob/master/configs/apache_doris.json) should be updated if a new language or branch is created. - -For more detail of the docsearch's configuration, please refer to [Configuration of DocSearch](https://docsearch.algolia.com/docs/config-file) - -## Deployment - -Just start a PR, and all things will be done automatically. - -## What Travis Does - -Once a PR accepted, travis ci will be triggered to build and deploy the whole website within its own branch. Here is what `.travis.yml` does: - -1. Prepare nodejs and vuepress enviorment. - -2. Use current branch's name as the relative url path in `.vuepress/config.js`(which is the `base` property). - -3. Build the documents into a website all by vuepress. - -4. Fetch asf-site repo to local directory, and copy `.vupress/dist/` into `{BRANCH}/`. - -5. Push the new site to asf-site repo with `GitHub Token`(which is preset in Travis console as a variable used in .travis.yml). - -## asf-site repository - -Finally the asf-site repository will be like: - -```bash -. -├─ master/ -│ ├─ en/ -│ │ ├─ subfolder -│ │ │ ├─ one.md -│ │ └─ three.md -│ └─ zh-CN/ -│ ├─ subfolder -│ │ ├─ one.md -│ └─ three.md -├─ incubating-0.11/ -│ ├─ en/ -│ │ ├─ subfolder -│ │ │ ├─ one.md -│ │ └─ three.md -│ └─ zh-CN/ -│ ├─ subfolder -│ │ ├─ one.md -│ └─ three.md -├─ index.html // user entry, and auto redirected to master folder -└─ versions.json // all versions that can be seleted on the website are defined here -``` - -And the `versions.json` is like: - -```json -{ - "en": [ - { - "text": "Versions", // dropdown label - "items": [ - { - "text": "master", // dropdown-item label - "link": "/../master/en/installing/compilation.html", // entry page for this version - "target": "_blank" - }, - { - "text": "branch-0.11", - "link": "/../branch-0.11/en/installing/compilation.html", - "target": "_blank" - } - ] - } - ], - "zh-CN": [ - { - "text": "版本", - "items": [ - { - "text": "master", - "link": "/../master/zh-CN/installing/compilation.html", - "target": "_blank" - }, - { - "text": "branch-0.11", - "link": "/../branch-0.11/zh-CN/installing/compilation.html", - "target": "_blank" - } - ] - } - ] -} -``` diff --git a/new-docs/build_help_zip.sh b/new-docs/build_help_zip.sh deleted file mode 100755 index 82fc8761dc..0000000000 --- a/new-docs/build_help_zip.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############################################################## -# This script is used to build help doc zip file -############################################################## - -#!/bin/bash - -set -eo pipefail - -ROOT=`dirname "$0"` -ROOT=`cd "$ROOT"; pwd` - -BUILD_DIR=build -HELP_DIR=contents -HELP_ZIP_FILE=help-resource.zip -SQL_REF_DOC_DIR=zh-CN/sql-reference/ - -cd $ROOT -rm -rf $BUILD_DIR $HELP_DIR $HELP_ZIP_FILE -mkdir -p $BUILD_DIR $HELP_DIR - -cp -r $SQL_REF_DOC_DIR/* $HELP_DIR/ - -zip -r $HELP_ZIP_FILE $HELP_DIR -mv $HELP_ZIP_FILE $BUILD_DIR/ - - diff --git a/new-docs/en/README.md b/new-docs/en/README.md deleted file mode 100644 index d5ddba4ad6..0000000000 --- a/new-docs/en/README.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -home: true -heroImage: /images/home/banner-stats.png -heroBgImage: /images/home/hero-bg.png -heroText: - - Welcome to - - Apache Doris -tagline: A fast MPP database for all modern analytics on big data. -structure: - title: Apache Doris - subTitle: - descriptions: - - Apache Doris is a modern MPP analytical database product. It can provide sub-second queries and efficient real-time data analysis. With it's distributed architecture, up to 10PB level datasets will be well supported and easy to operate. - - Apache Doris can meet various data analysis demands, including history data reports, real-time data analysis, interactive data analysis, and exploratory data analysis. Make your data analysis easier! - image: /images/home/structure-fresh.png - actionText: Learn More - actionLink: /en/getting-started/basic-usage -features: - title: Apache Doris Core Features - subTitle: - list: - - title: Modern MPP architecture - icon: /images/home/struct.png - - title: Getting result of a query within one second - icon: /images/home/clock.png - - title: Support standard SQL language, compatible with MySQL protocol - icon: /images/home/sql.png - - title: Vectorized SQL executor - icon: /images/home/program.png - - title: Effective data model for aggregation - icon: /images/home/aggr.png - - title: Rollup, novel pre-computation mechanism - icon: /images/home/rollup.png - - title: High performance, high availability, high reliability - icon: /images/home/cpu.png - - title: easy for operation, Elastic data warehouse for big data - icon: /images/home/dev.png -cases: - title: Apache Doris Users - subTitle: - list: - - logo: /images/home/logo-meituan.png - alt: 美团 - - logo: /images/home/logo-xiaomi.png - alt: 小米 - - logo: /images/home/logo-jd.png - alt: 京东 - - logo: /images/home/logo-huawei.png - alt: 华为 - - logo: /images/home/logo-baidu.png - alt: 百度 - - logo: /images/home/logo-weibo.png - alt: 新浪微博 - - logo: /images/home/logo-zuoyebang.png - alt: 作业帮 - - logo: /images/home/logo-vipkid.png - alt: Vipkid - - logo: /images/home/logo-360.png - alt: 360 - - logo: /images/home/logo-shopee.png - alt: Shopee - - logo: /images/home/logo-tantan.png - alt: 探探 - - logo: /images/home/logo-kuaishou.png - alt: 快手 - - logo: /images/home/logo-sohu.png - alt: 搜狐 - - logo: /images/home/logo-yidian.png - alt: 一点资讯 - - logo: /images/home/logo-dingdong.png - alt: 叮咚买菜 - - logo: /images/home/logo-youdao.png - alt: 有道 -actionText: Quick Start → -actionLink: /en/get-starting/get-starting -articleText: Latest News -articleLink: /en/article/article-list ---- diff --git a/new-docs/en/admin-manual/http-actions/cancel-label.md b/new-docs/en/admin-manual/http-actions/cancel-label.md deleted file mode 100644 index e60393e021..0000000000 --- a/new-docs/en/admin-manual/http-actions/cancel-label.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -{ - "title": "CANCEL LABEL", - "language": "en" -} ---- - - - -# CANCEL LABEL -## description - NAME: - cancel_label: cancel a transaction with label - - SYNOPSIS - curl -u user:passwd -XPOST http://host:port/api/{db}/_cancel?label={label} - - DESCRIPTION - - This is to cancel a transaction with specified label. - - RETURN VALUES - - Return a JSON format string: - - Status: - Success: cancel succeed - Others: cancel failed - Message: Error message if cancel failed - - ERRORS - -## example - - 1. Cancel the transaction with label "testLabel" on database "testDb" - - curl -u root -XPOST http://host:port/api/testDb/_cancel?label=testLabel - -## keyword - - CANCEL, LABEL - - - - - - diff --git a/new-docs/en/admin-manual/http-actions/check-reset-rpc-cache.md b/new-docs/en/admin-manual/http-actions/check-reset-rpc-cache.md deleted file mode 100644 index cbe3137e5c..0000000000 --- a/new-docs/en/admin-manual/http-actions/check-reset-rpc-cache.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -{ - "title": "CHECK/RESET Stub Cache", - "language": "zh-CN" -} ---- - - - -# CHECK/RESET Stub Cache -## description - -### Check Stub Cache - Check whether the connection cache is available - - Description: Check whether the connection cache is available, the maximum load is 10M - METHOD: GET - URI: http://be_host:be_http_port/api/check_rpc_channel/{host_to_check}/{remot_brpc_port}/{payload_size} - -### Reset Stub Cache - This api is used to reset the connection cache of brpc. Endpoints can be in the form of `all` to clear all caches, `host1:port1,host2:port2,...`: clear to the cache of the specified target - - Description: Reset connection cache - METHOD: GET - URI: http://be_host:be_http_port/api/reset_rpc_channel/{endpoints} -## example - - curl -X GET "http://host:port/api/check_rpc_channel/host2/8060/1024000" - curl -X GET "http://host:port/api/reset_rpc_channel/all" - diff --git a/new-docs/en/admin-manual/http-actions/compaction-action.md b/new-docs/en/admin-manual/http-actions/compaction-action.md deleted file mode 100644 index f753cea238..0000000000 --- a/new-docs/en/admin-manual/http-actions/compaction-action.md +++ /dev/null @@ -1,211 +0,0 @@ ---- -{ - "title": "Compaction Action", - "language": "en" -} ---- - - - -# Compaction Action - -This API is used to view the overall compaction status of a BE node or the compaction status of a specified tablet. It can also be used to manually trigger Compaction. - -## View Compaction status - -### The overall compaction status of the node - -``` -curl -X GET http://be_host:webserver_port/api/compaction/run_status -``` - -Return JSON: - -``` -{ - "CumulativeCompaction": { - "/home/disk1" : [10001, 10002], - "/home/disk2" : [10003] - }, - "BaseCompaction": { - "/home/disk1" : [10001, 10002], - "/home/disk2" : [10003] - } -} -``` - -This structure represents the id of the tablet that is performing the compaction task in a certain data directory, and the type of compaction. - -### Specify the compaction status of the tablet - -``` -curl -X GET http://be_host:webserver_port/api/compaction/show?tablet_id=xxxx -``` - -If the tablet does not exist, an error in JSON format is returned: - -``` -{ - "status": "Fail", - "msg": "Tablet not found" -} -``` - -If the tablet exists, the result is returned in JSON format: - -``` -{ - "cumulative policy type": "NUM_BASED", - "cumulative point": 50, - "last cumulative failure time": "2019-12-16 18:13:43.224", - "last base failure time": "2019-12-16 18:13:23.320", - "last cumu success time": "2019-12-16 18:12:15.110", - "last base success time": "2019-12-16 18:11:50.780", - "rowsets": [ - "[0-48] 10 DATA OVERLAPPING 574.00 MB", - "[49-49] 2 DATA OVERLAPPING 574.00 B", - "[50-50] 0 DELETE NONOVERLAPPING 574.00 B", - "[51-51] 5 DATA OVERLAPPING 574.00 B" - ], - "missing_rowsets": [], - "stale version path": [ - { - "path id": "2", - "last create time": "2019-12-16 18:11:15.110 +0800", - "path list": "2-> [0-24] -> [25-48]" - }, - { - "path id": "1", - "last create time": "2019-12-16 18:13:15.110 +0800", - "path list": "1-> [25-40] -> [40-48]" - } - ] -} -``` - -Explanation of results: - -* cumulative policy type: The cumulative compaction policy type which is used by current tablet. -* cumulative point: The version boundary between base and cumulative compaction. Versions before (excluding) points are handled by base compaction. Versions after (inclusive) are handled by cumulative compaction. -* last cumulative failure time: The time when the last cumulative compaction failed. After 10 minutes by default, cumulative compaction is attempted on the this tablet again. -* last base failure time: The time when the last base compaction failed. After 10 minutes by default, base compaction is attempted on the this tablet again. -* rowsets: The current rowsets collection of this tablet. [0-48] means a rowset with version 0-48. The second number is the number of segments in a rowset. The `DELETE` indicates the delete version. `OVERLAPPING` and `NONOVERLAPPING` indicates whether data between segments is overlap. -* missing_rowset: The missing rowsets. -* stale version path: The merged version path of the rowset collection currently merged in the tablet. It is an array structure and each element represents a merged path. Each element contains three attributes: path id indicates the version path id, and last create time indicates the creation time of the most recent rowset on the path. By default, all rowsets on this path will be deleted after half an hour at the last create time. - -### Examples - -``` -curl -X GET http://192.168.10.24:8040/api/compaction/show?tablet_id=10015 -``` - -## Manually trigger Compaction - -``` -curl -X POST http://be_host:webserver_port/api/compaction/run?tablet_id=xxxx\&compact_type=cumulative -``` - -The only one manual compaction task that can be performed at a moment, and the value range of compact_type is base or cumulative - -If the tablet does not exist, an error in JSON format is returned: - -``` -{ - "status": "Fail", - "msg": "Tablet not found" -} -``` - -If the compaction execution task fails to be triggered, an error in JSON format is returned: - -``` -{ - "status": "Fail", - "msg": "fail to execute compaction, error = -2000" -} -``` - -If the compaction execution task successes to be triggered, an error in JSON format is returned: - -``` -{ - "status": "Success", - "msg": "compaction task is successfully triggered." -} -``` - -Explanation of results: - -* status: Trigger task status, when it is successfully triggered, it is Success; when for some reason (for example, the appropriate version is not obtained), it returns Fail. -* msg: Give specific success or failure information. - -### Examples - -``` -curl -X POST http://192.168.10.24:8040/api/compaction/run?tablet_id=10015\&compact_type=cumulative -``` - -## Manual Compaction execution status - -``` -curl -X GET http://be_host:webserver_port/api/compaction/run_status?tablet_id=xxxx -``` -If the tablet does not exist, an error in JSON format is returned: - -``` -{ - "status": "Fail", - "msg": "Tablet not found" -} -``` - -If the tablet exists and the tablet is not running, JSON format is returned: - -``` -{ - "status" : "Success", - "run_status" : false, - "msg" : "this tablet_id is not running", - "tablet_id" : 11308, - "compact_type" : "" -} -``` - -If the tablet exists and the tablet is running, JSON format is returned: - -``` -{ - "status" : "Success", - "run_status" : true, - "msg" : "this tablet_id is running", - "tablet_id" : 11308, - "compact_type" : "cumulative" -} -``` - -Explanation of results: - -* run_status: Get the current manual compaction task execution status. - -### Examples - -``` -curl -X GET http://192.168.10.24:8040/api/compaction/run_status?tablet_id=10015 - diff --git a/new-docs/en/admin-manual/http-actions/connection-action.md b/new-docs/en/admin-manual/http-actions/connection-action.md deleted file mode 100644 index d7d81e0516..0000000000 --- a/new-docs/en/admin-manual/http-actions/connection-action.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -{ - "title": "CONNECTION", - "language": "en" -} ---- - - - -# CONNECTION - -To get current query_id from connection - -``` -curl -X GET http://fe_host:fe_http_port/api/connection?connection_id=123 -``` - -If connection_id does not exist, return 404 NOT FOUND ERROR - -If connection_id exists, return last query_id belongs to connection_id -``` -{ - "query_id" : 9133b7efa92a44c8-8ed4b44772ec2a0c -} -``` diff --git a/new-docs/en/admin-manual/http-actions/fe-get-log-file.md b/new-docs/en/admin-manual/http-actions/fe-get-log-file.md deleted file mode 100644 index 5a7595ad58..0000000000 --- a/new-docs/en/admin-manual/http-actions/fe-get-log-file.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -{ - "title": "get\\_log\\_file", - "language": "en" -} ---- - - - -# get\_log\_file - -To get FE log via HTTP - -## Types of FE log - -1. fe.audit.log (Audit log) - - The audit log records the all statements executed. Audit log's name format as follow: - - ``` - fe.audit.log # The latest audit log - fe.audit.log.20190603.1 # The historical audit log. The smaller the sequence number, the newer the log. - fe.audit.log.20190603.2 - fe.audit.log.20190602.1 - ... - ``` - -## Example - -1. Get the list of specified type of logs - - Example - - `curl -v -X HEAD -uuser:passwd http://fe_host:http_port/api/get_log_file?type=fe.audit.log` - - Returns: - - ``` - HTTP/1.1 200 OK - file_infos: {"fe.audit.log":24759,"fe.audit.log.20190528.1":132934} - content-type: text/html - connection: keep-alive - ``` - - In the header of result, the `file_infos` section saves the file list and file size in JSON format. - -2. Download files - - Example: - - ``` - curl -X GET -uuser:passwd http://fe_host:http_port/api/get_log_file?type=fe.audit.log\&file=fe.audit.log.20190528.1 - ``` - -## Notification - -Need ADMIN privilege. diff --git a/new-docs/en/admin-manual/http-actions/fe/backends-action.md b/new-docs/en/admin-manual/http-actions/fe/backends-action.md deleted file mode 100644 index 17589dd95d..0000000000 --- a/new-docs/en/admin-manual/http-actions/fe/backends-action.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -{ - "title": "Backends Action", - "language": "zh-CN" -} ---- - - - -# Backends Action - -## Request - -``` -GET /api/backends -``` - -## Description - -Backends Action returns the Backends list, including Backend's IP, PORT and other information. - -## Path parameters - -None - -## Query parameters - -* `is_alive` - - Optional parameters. Whether to return the surviving BE nodes. The default is false, which means that all BE nodes are returned. - -## Request body - -None - -## Response - -``` -{ - "msg": "success", - "code": 0, - "data": { - "backends": [ - { - "ip": "192.1.1.1", - "http_port": 8040, - "is_alive": true - } - ] - }, - "count": 0 -} -``` diff --git a/new-docs/en/admin-manual/http-actions/fe/bootstrap-action.md b/new-docs/en/admin-manual/http-actions/fe/bootstrap-action.md deleted file mode 100644 index 1f30ca3498..0000000000 --- a/new-docs/en/admin-manual/http-actions/fe/bootstrap-action.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -{ - "title": "Bootstrap Action", - "language": "en" -} ---- - - - -# Bootstrap Action - -## Request - -`GET /api/bootstrap` - -## Description - -It is used to judge whether the FE has started. When no parameters are provided, only whether the startup is successful is returned. If `token` and `cluster_id` are provided, more detailed information is returned. - -## Path parameters - -none - -## Query parameters - -* `cluster_id` - - The cluster id. It can be viewed in the file `palo-meta/image/VERSION`. - -* `token` - - Cluster token. It can be viewed in the file `palo-meta/image/VERSION`. - -## Request body - -none - -## Response - -* No parameters provided - - ``` - { - "msg": "OK", - "code": 0, - "data": null, - "count": 0 - } - ``` - - A code of 0 means that the FE node has started successfully. Error codes other than 0 indicate other errors. - -* Provide `token` and `cluster_id` - - ``` - { - "msg": "OK", - "code": 0, - "data": { - "queryPort": 9030, - "rpcPort": 9020, - "maxReplayedJournal": 17287 - }, - "count": 0 - } - ``` - - * `queryPort` is the MySQL protocol port of the FE node. - * `rpcPort` is the thrift RPC port of the FE node. - * `maxReplayedJournal` represents the maximum metadata journal id currently played back by the FE node. - -## Examples - -1. No parameters - - ``` - GET /api/bootstrap - - Response: - { - "msg": "OK", - "code": 0, - "data": null, - "count": 0 - } - ``` - -2. Provide `token` and `cluster_id` - - ``` - GET /api/bootstrap?cluster_id=935437471&token=ad87f6dd-c93f-4880-bcdb-8ca8c9ab3031 - - Response: - { - "msg": "OK", - "code": 0, - "data": { - "queryPort": 9030, - "rpcPort": 9020, - "maxReplayedJournal": 17287 - }, - "count": 0 - } - ``` \ No newline at end of file diff --git a/new-docs/en/admin-manual/http-actions/fe/cancel-load-action.md b/new-docs/en/admin-manual/http-actions/fe/cancel-load-action.md deleted file mode 100644 index 1ca4f5fcd7..0000000000 --- a/new-docs/en/admin-manual/http-actions/fe/cancel-load-action.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -{ - "title": "Cancel Load Action", - "language": "en" -} ---- - - - -# Cancel Load Action - -## Request - -`POST /api//_cancel` - -## Description - -Used to cancel the load transaction of the specified label. - -## Path parameters - -* `` - - Specify the database name - -## Query parameters - -* `